Fangjun Kuang
Committed by GitHub

Refactor the JNI interface to make it more modular and maintainable (#802)

正在显示 98 个修改的文件 包含 1156 行增加972 行删除

要显示太多修改。

为保证性能只显示 98 of 98+ 个文件。

  1 +name: apk-asr
  2 +
  3 +on:
  4 + push:
  5 + tags:
  6 + - '*'
  7 +
  8 + workflow_dispatch:
  9 +
  10 +concurrency:
  11 + group: apk-asr-${{ github.ref }}
  12 + cancel-in-progress: true
  13 +
  14 +permissions:
  15 + contents: write
  16 +
  17 +jobs:
  18 + apk_asr:
  19 + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
  20 + runs-on: ${{ matrix.os }}
  21 + name: apk for asr ${{ matrix.index }}/${{ matrix.total }}
  22 + strategy:
  23 + fail-fast: false
  24 + matrix:
  25 + os: [ubuntu-latest]
  26 + total: ["1"]
  27 + index: ["0"]
  28 +
  29 + steps:
  30 + - uses: actions/checkout@v4
  31 + with:
  32 + fetch-depth: 0
  33 +
  34 + # https://github.com/actions/setup-java
  35 + - uses: actions/setup-java@v4
  36 + with:
  37 + distribution: 'temurin' # See 'Supported distributions' for available options
  38 + java-version: '21'
  39 +
  40 + - name: ccache
  41 + uses: hendrikmuhs/ccache-action@v1.2
  42 + with:
  43 + key: ${{ matrix.os }}-android
  44 +
  45 + - name: Display NDK HOME
  46 + shell: bash
  47 + run: |
  48 + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}"
  49 + ls -lh ${ANDROID_NDK_LATEST_HOME}
  50 +
  51 + - name: Install Python dependencies
  52 + shell: bash
  53 + run: |
  54 + python3 -m pip install --upgrade pip jinja2
  55 +
  56 + - name: Setup build tool version variable
  57 + shell: bash
  58 + run: |
  59 + echo "---"
  60 + ls -lh /usr/local/lib/android/
  61 + echo "---"
  62 +
  63 + ls -lh /usr/local/lib/android/sdk
  64 + echo "---"
  65 +
  66 + ls -lh /usr/local/lib/android/sdk/build-tools
  67 + echo "---"
  68 +
  69 + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1)
  70 + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV
  71 + echo "Last build tool version is: $BUILD_TOOL_VERSION"
  72 +
  73 + - name: Generate build script
  74 + shell: bash
  75 + run: |
  76 + cd scripts/apk
  77 +
  78 + total=${{ matrix.total }}
  79 + index=${{ matrix.index }}
  80 +
  81 + ./generate-asr-apk-script.py --total $total --index $index
  82 +
  83 + chmod +x build-apk-asr.sh
  84 + mv -v ./build-apk-asr.sh ../..
  85 +
  86 + - name: build APK
  87 + shell: bash
  88 + run: |
  89 + export CMAKE_CXX_COMPILER_LAUNCHER=ccache
  90 + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
  91 + cmake --version
  92 +
  93 + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME
  94 + ./build-apk-asr.sh
  95 +
  96 + - name: Display APK
  97 + shell: bash
  98 + run: |
  99 + ls -lh ./apks/
  100 + du -h -d1 .
  101 +
  102 + # https://github.com/marketplace/actions/sign-android-release
  103 + - uses: r0adkll/sign-android-release@v1
  104 + name: Sign app APK
  105 + with:
  106 + releaseDirectory: ./apks
  107 + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }}
  108 + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }}
  109 + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }}
  110 + env:
  111 + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }}
  112 +
  113 + - name: Display APK after signing
  114 + shell: bash
  115 + run: |
  116 + ls -lh ./apks/
  117 + du -h -d1 .
  118 +
  119 + - name: Rename APK after signing
  120 + shell: bash
  121 + run: |
  122 + cd apks
  123 + rm -fv signingKey.jks
  124 + rm -fv *.apk.idsig
  125 + rm -fv *-aligned.apk
  126 +
  127 + all_apks=$(ls -1 *-signed.apk)
  128 + echo "----"
  129 + echo $all_apks
  130 + echo "----"
  131 + for apk in ${all_apks[@]}; do
  132 + n=$(echo $apk | sed -e s/-signed//)
  133 + mv -v $apk $n
  134 + done
  135 +
  136 + cd ..
  137 +
  138 + ls -lh ./apks/
  139 + du -h -d1 .
  140 +
  141 + - name: Display APK after rename
  142 + shell: bash
  143 + run: |
  144 + ls -lh ./apks/
  145 + du -h -d1 .
  146 +
  147 + - name: Publish to huggingface
  148 + env:
  149 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  150 + uses: nick-fields/retry@v3
  151 + with:
  152 + max_attempts: 20
  153 + timeout_seconds: 200
  154 + shell: bash
  155 + command: |
  156 + git config --global user.email "csukuangfj@gmail.com"
  157 + git config --global user.name "Fangjun Kuang"
  158 +
  159 + rm -rf huggingface
  160 + export GIT_LFS_SKIP_SMUDGE=1
  161 +
  162 + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface
  163 + cd huggingface
  164 + git fetch
  165 + git pull
  166 + git merge -m "merge remote" --ff origin main
  167 +
  168 + mkdir -p asr
  169 + cp -v ../apks/*.apk ./asr/
  170 + git status
  171 + git lfs track "*.apk"
  172 + git add .
  173 + git commit -m "add more apks"
  174 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main
@@ -95,3 +95,4 @@ sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 @@ -95,3 +95,4 @@ sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12
95 spoken-language-identification-test-wavs 95 spoken-language-identification-test-wavs
96 my-release-key* 96 my-release-key*
97 vits-zh-hf-fanchen-C 97 vits-zh-hf-fanchen-C
  98 +sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01
@@ -16,6 +16,7 @@ @@ -16,6 +16,7 @@
16 tools:targetApi="31"> 16 tools:targetApi="31">
17 <activity 17 <activity
18 android:name=".MainActivity" 18 android:name=".MainActivity"
  19 + android:label="ASR: Next-gen Kaldi"
19 android:exported="true"> 20 android:exported="true">
20 <intent-filter> 21 <intent-filter>
21 <action android:name="android.intent.action.MAIN" /> 22 <action android:name="android.intent.action.MAIN" />
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
@@ -12,16 +12,19 @@ import android.widget.Button @@ -12,16 +12,19 @@ import android.widget.Button
12 import android.widget.TextView 12 import android.widget.TextView
13 import androidx.appcompat.app.AppCompatActivity 13 import androidx.appcompat.app.AppCompatActivity
14 import androidx.core.app.ActivityCompat 14 import androidx.core.app.ActivityCompat
15 -import com.k2fsa.sherpa.onnx.*  
16 import kotlin.concurrent.thread 15 import kotlin.concurrent.thread
17 16
18 private const val TAG = "sherpa-onnx" 17 private const val TAG = "sherpa-onnx"
19 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 18 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
20 19
  20 +// To enable microphone in android emulator, use
  21 +//
  22 +// adb emu avd hostmicon
  23 +
21 class MainActivity : AppCompatActivity() { 24 class MainActivity : AppCompatActivity() {
22 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) 25 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
23 26
24 - private lateinit var model: SherpaOnnx 27 + private lateinit var recognizer: OnlineRecognizer
25 private var audioRecord: AudioRecord? = null 28 private var audioRecord: AudioRecord? = null
26 private lateinit var recordButton: Button 29 private lateinit var recordButton: Button
27 private lateinit var textView: TextView 30 private lateinit var textView: TextView
@@ -87,7 +90,6 @@ class MainActivity : AppCompatActivity() { @@ -87,7 +90,6 @@ class MainActivity : AppCompatActivity() {
87 audioRecord!!.startRecording() 90 audioRecord!!.startRecording()
88 recordButton.setText(R.string.stop) 91 recordButton.setText(R.string.stop)
89 isRecording = true 92 isRecording = true
90 - model.reset(true)  
91 textView.text = "" 93 textView.text = ""
92 lastText = "" 94 lastText = ""
93 idx = 0 95 idx = 0
@@ -108,6 +110,7 @@ class MainActivity : AppCompatActivity() { @@ -108,6 +110,7 @@ class MainActivity : AppCompatActivity() {
108 110
109 private fun processSamples() { 111 private fun processSamples() {
110 Log.i(TAG, "processing samples") 112 Log.i(TAG, "processing samples")
  113 + val stream = recognizer.createStream()
111 114
112 val interval = 0.1 // i.e., 100 ms 115 val interval = 0.1 // i.e., 100 ms
113 val bufferSize = (interval * sampleRateInHz).toInt() // in samples 116 val bufferSize = (interval * sampleRateInHz).toInt() // in samples
@@ -117,29 +120,41 @@ class MainActivity : AppCompatActivity() { @@ -117,29 +120,41 @@ class MainActivity : AppCompatActivity() {
117 val ret = audioRecord?.read(buffer, 0, buffer.size) 120 val ret = audioRecord?.read(buffer, 0, buffer.size)
118 if (ret != null && ret > 0) { 121 if (ret != null && ret > 0) {
119 val samples = FloatArray(ret) { buffer[it] / 32768.0f } 122 val samples = FloatArray(ret) { buffer[it] / 32768.0f }
120 - model.acceptWaveform(samples, sampleRate=sampleRateInHz)  
121 - while (model.isReady()) {  
122 - model.decode() 123 + stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
  124 + while (recognizer.isReady(stream)) {
  125 + recognizer.decode(stream)
123 } 126 }
124 127
125 - val isEndpoint = model.isEndpoint()  
126 - val text = model.text 128 + val isEndpoint = recognizer.isEndpoint(stream)
  129 + var text = recognizer.getResult(stream).text
  130 +
  131 + // For streaming parformer, we need to manually add some
  132 + // paddings so that it has enough right context to
  133 + // recognize the last word of this segment
  134 + if (isEndpoint && recognizer.config.modelConfig.paraformer.encoder.isNotBlank()) {
  135 + val tailPaddings = FloatArray((0.8 * sampleRateInHz).toInt())
  136 + stream.acceptWaveform(tailPaddings, sampleRate = sampleRateInHz)
  137 + while (recognizer.isReady(stream)) {
  138 + recognizer.decode(stream)
  139 + }
  140 + text = recognizer.getResult(stream).text
  141 + }
127 142
128 - var textToDisplay = lastText; 143 + var textToDisplay = lastText
129 144
130 - if(text.isNotBlank()) {  
131 - if (lastText.isBlank()) {  
132 - textToDisplay = "${idx}: ${text}" 145 + if (text.isNotBlank()) {
  146 + textToDisplay = if (lastText.isBlank()) {
  147 + "${idx}: $text"
133 } else { 148 } else {
134 - textToDisplay = "${lastText}\n${idx}: ${text}" 149 + "${lastText}\n${idx}: $text"
135 } 150 }
136 } 151 }
137 152
138 if (isEndpoint) { 153 if (isEndpoint) {
139 - model.reset() 154 + recognizer.reset(stream)
140 if (text.isNotBlank()) { 155 if (text.isNotBlank()) {
141 - lastText = "${lastText}\n${idx}: ${text}"  
142 - textToDisplay = lastText; 156 + lastText = "${lastText}\n${idx}: $text"
  157 + textToDisplay = lastText
143 idx += 1 158 idx += 1
144 } 159 }
145 } 160 }
@@ -149,6 +164,7 @@ class MainActivity : AppCompatActivity() { @@ -149,6 +164,7 @@ class MainActivity : AppCompatActivity() {
149 } 164 }
150 } 165 }
151 } 166 }
  167 + stream.release()
152 } 168 }
153 169
154 private fun initMicrophone(): Boolean { 170 private fun initMicrophone(): Boolean {
@@ -180,7 +196,7 @@ class MainActivity : AppCompatActivity() { @@ -180,7 +196,7 @@ class MainActivity : AppCompatActivity() {
180 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 196 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
181 // for a list of available models 197 // for a list of available models
182 val type = 0 198 val type = 0
183 - println("Select model type ${type}") 199 + Log.i(TAG, "Select model type $type")
184 val config = OnlineRecognizerConfig( 200 val config = OnlineRecognizerConfig(
185 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), 201 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
186 modelConfig = getModelConfig(type = type)!!, 202 modelConfig = getModelConfig(type = type)!!,
@@ -189,7 +205,7 @@ class MainActivity : AppCompatActivity() { @@ -189,7 +205,7 @@ class MainActivity : AppCompatActivity() {
189 enableEndpoint = true, 205 enableEndpoint = true,
190 ) 206 )
191 207
192 - model = SherpaOnnx( 208 + recognizer = OnlineRecognizer(
193 assetManager = application.assets, 209 assetManager = application.assets,
194 config = config, 210 config = config,
195 ) 211 )
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt
@@ -16,6 +16,7 @@ @@ -16,6 +16,7 @@
16 tools:targetApi="31"> 16 tools:targetApi="31">
17 <activity 17 <activity
18 android:name=".MainActivity" 18 android:name=".MainActivity"
  19 + android:label="2pass ASR: Next-gen Kaldi"
19 android:exported="true"> 20 android:exported="true">
20 <intent-filter> 21 <intent-filter>
21 <action android:name="android.intent.action.MAIN" /> 22 <action android:name="android.intent.action.MAIN" />
@@ -29,4 +30,4 @@ @@ -29,4 +30,4 @@
29 </activity> 30 </activity>
30 </application> 31 </application>
31 32
32 -</manifest>  
  33 +</manifest>
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
@@ -17,11 +17,13 @@ import kotlin.concurrent.thread @@ -17,11 +17,13 @@ import kotlin.concurrent.thread
17 private const val TAG = "sherpa-onnx" 17 private const val TAG = "sherpa-onnx"
18 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 18 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
19 19
  20 +// adb emu avd hostmicon
  21 +// to enable microphone inside the emulator
20 class MainActivity : AppCompatActivity() { 22 class MainActivity : AppCompatActivity() {
21 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) 23 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
22 24
23 - private lateinit var onlineRecognizer: SherpaOnnx  
24 - private lateinit var offlineRecognizer: SherpaOnnxOffline 25 + private lateinit var onlineRecognizer: OnlineRecognizer
  26 + private lateinit var offlineRecognizer: OfflineRecognizer
25 private var audioRecord: AudioRecord? = null 27 private var audioRecord: AudioRecord? = null
26 private lateinit var recordButton: Button 28 private lateinit var recordButton: Button
27 private lateinit var textView: TextView 29 private lateinit var textView: TextView
@@ -93,7 +95,6 @@ class MainActivity : AppCompatActivity() { @@ -93,7 +95,6 @@ class MainActivity : AppCompatActivity() {
93 audioRecord!!.startRecording() 95 audioRecord!!.startRecording()
94 recordButton.setText(R.string.stop) 96 recordButton.setText(R.string.stop)
95 isRecording = true 97 isRecording = true
96 - onlineRecognizer.reset(true)  
97 samplesBuffer.clear() 98 samplesBuffer.clear()
98 textView.text = "" 99 textView.text = ""
99 lastText = "" 100 lastText = ""
@@ -115,6 +116,7 @@ class MainActivity : AppCompatActivity() { @@ -115,6 +116,7 @@ class MainActivity : AppCompatActivity() {
115 116
116 private fun processSamples() { 117 private fun processSamples() {
117 Log.i(TAG, "processing samples") 118 Log.i(TAG, "processing samples")
  119 + val stream = onlineRecognizer.createStream()
118 120
119 val interval = 0.1 // i.e., 100 ms 121 val interval = 0.1 // i.e., 100 ms
120 val bufferSize = (interval * sampleRateInHz).toInt() // in samples 122 val bufferSize = (interval * sampleRateInHz).toInt() // in samples
@@ -126,29 +128,29 @@ class MainActivity : AppCompatActivity() { @@ -126,29 +128,29 @@ class MainActivity : AppCompatActivity() {
126 val samples = FloatArray(ret) { buffer[it] / 32768.0f } 128 val samples = FloatArray(ret) { buffer[it] / 32768.0f }
127 samplesBuffer.add(samples) 129 samplesBuffer.add(samples)
128 130
129 - onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz)  
130 - while (onlineRecognizer.isReady()) {  
131 - onlineRecognizer.decode() 131 + stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
  132 + while (onlineRecognizer.isReady(stream)) {
  133 + onlineRecognizer.decode(stream)
132 } 134 }
133 - val isEndpoint = onlineRecognizer.isEndpoint() 135 + val isEndpoint = onlineRecognizer.isEndpoint(stream)
134 var textToDisplay = lastText 136 var textToDisplay = lastText
135 137
136 - var text = onlineRecognizer.text 138 + var text = onlineRecognizer.getResult(stream).text
137 if (text.isNotBlank()) { 139 if (text.isNotBlank()) {
138 - if (lastText.isBlank()) { 140 + textToDisplay = if (lastText.isBlank()) {
139 // textView.text = "${idx}: ${text}" 141 // textView.text = "${idx}: ${text}"
140 - textToDisplay = "${idx}: ${text}" 142 + "${idx}: $text"
141 } else { 143 } else {
142 - textToDisplay = "${lastText}\n${idx}: ${text}" 144 + "${lastText}\n${idx}: $text"
143 } 145 }
144 } 146 }
145 147
146 if (isEndpoint) { 148 if (isEndpoint) {
147 - onlineRecognizer.reset() 149 + onlineRecognizer.reset(stream)
148 150
149 if (text.isNotBlank()) { 151 if (text.isNotBlank()) {
150 text = runSecondPass() 152 text = runSecondPass()
151 - lastText = "${lastText}\n${idx}: ${text}" 153 + lastText = "${lastText}\n${idx}: $text"
152 idx += 1 154 idx += 1
153 } else { 155 } else {
154 samplesBuffer.clear() 156 samplesBuffer.clear()
@@ -160,6 +162,7 @@ class MainActivity : AppCompatActivity() { @@ -160,6 +162,7 @@ class MainActivity : AppCompatActivity() {
160 } 162 }
161 } 163 }
162 } 164 }
  165 + stream.release()
163 } 166 }
164 167
165 private fun initMicrophone(): Boolean { 168 private fun initMicrophone(): Boolean {
@@ -190,8 +193,8 @@ class MainActivity : AppCompatActivity() { @@ -190,8 +193,8 @@ class MainActivity : AppCompatActivity() {
190 // Please change getModelConfig() to add new models 193 // Please change getModelConfig() to add new models
191 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 194 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
192 // for a list of available models 195 // for a list of available models
193 - val firstType = 1  
194 - println("Select model type ${firstType} for the first pass") 196 + val firstType = 9
  197 + Log.i(TAG, "Select model type $firstType for the first pass")
195 val config = OnlineRecognizerConfig( 198 val config = OnlineRecognizerConfig(
196 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), 199 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
197 modelConfig = getModelConfig(type = firstType)!!, 200 modelConfig = getModelConfig(type = firstType)!!,
@@ -199,7 +202,7 @@ class MainActivity : AppCompatActivity() { @@ -199,7 +202,7 @@ class MainActivity : AppCompatActivity() {
199 enableEndpoint = true, 202 enableEndpoint = true,
200 ) 203 )
201 204
202 - onlineRecognizer = SherpaOnnx( 205 + onlineRecognizer = OnlineRecognizer(
203 assetManager = application.assets, 206 assetManager = application.assets,
204 config = config, 207 config = config,
205 ) 208 )
@@ -209,15 +212,15 @@ class MainActivity : AppCompatActivity() { @@ -209,15 +212,15 @@ class MainActivity : AppCompatActivity() {
209 // Please change getOfflineModelConfig() to add new models 212 // Please change getOfflineModelConfig() to add new models
210 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 213 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
211 // for a list of available models 214 // for a list of available models
212 - val secondType = 1  
213 - println("Select model type ${secondType} for the second pass") 215 + val secondType = 0
  216 + Log.i(TAG, "Select model type $secondType for the second pass")
214 217
215 val config = OfflineRecognizerConfig( 218 val config = OfflineRecognizerConfig(
216 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), 219 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
217 modelConfig = getOfflineModelConfig(type = secondType)!!, 220 modelConfig = getOfflineModelConfig(type = secondType)!!,
218 ) 221 )
219 222
220 - offlineRecognizer = SherpaOnnxOffline( 223 + offlineRecognizer = OfflineRecognizer(
221 assetManager = application.assets, 224 assetManager = application.assets,
222 config = config, 225 config = config,
223 ) 226 )
@@ -244,8 +247,15 @@ class MainActivity : AppCompatActivity() { @@ -244,8 +247,15 @@ class MainActivity : AppCompatActivity() {
244 val n = maxOf(0, samples.size - 8000) 247 val n = maxOf(0, samples.size - 8000)
245 248
246 samplesBuffer.clear() 249 samplesBuffer.clear()
247 - samplesBuffer.add(samples.sliceArray(n..samples.size-1)) 250 + samplesBuffer.add(samples.sliceArray(n until samples.size))
248 251
249 - return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz) 252 + val stream = offlineRecognizer.createStream()
  253 + stream.acceptWaveform(samples.sliceArray(0..n), sampleRateInHz)
  254 + offlineRecognizer.decode(stream)
  255 + val result = offlineRecognizer.getResult(stream)
  256 +
  257 + stream.release()
  258 +
  259 + return result.text
250 } 260 }
251 } 261 }
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
1 -package com.k2fsa.sherpa.onnx  
2 -  
3 -import android.content.res.AssetManager  
4 -  
5 -data class EndpointRule(  
6 - var mustContainNonSilence: Boolean,  
7 - var minTrailingSilence: Float,  
8 - var minUtteranceLength: Float,  
9 -)  
10 -  
11 -data class EndpointConfig(  
12 - var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f),  
13 - var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f),  
14 - var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f)  
15 -)  
16 -  
17 -data class OnlineTransducerModelConfig(  
18 - var encoder: String = "",  
19 - var decoder: String = "",  
20 - var joiner: String = "",  
21 -)  
22 -  
23 -data class OnlineParaformerModelConfig(  
24 - var encoder: String = "",  
25 - var decoder: String = "",  
26 -)  
27 -  
28 -data class OnlineZipformer2CtcModelConfig(  
29 - var model: String = "",  
30 -)  
31 -  
32 -data class OnlineModelConfig(  
33 - var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(),  
34 - var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),  
35 - var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(),  
36 - var tokens: String,  
37 - var numThreads: Int = 1,  
38 - var debug: Boolean = false,  
39 - var provider: String = "cpu",  
40 - var modelType: String = "",  
41 -)  
42 -  
43 -data class OnlineLMConfig(  
44 - var model: String = "",  
45 - var scale: Float = 0.5f,  
46 -)  
47 -  
48 -data class FeatureConfig(  
49 - var sampleRate: Int = 16000,  
50 - var featureDim: Int = 80,  
51 -)  
52 -  
53 -data class OnlineRecognizerConfig(  
54 - var featConfig: FeatureConfig = FeatureConfig(),  
55 - var modelConfig: OnlineModelConfig,  
56 - var lmConfig: OnlineLMConfig = OnlineLMConfig(),  
57 - var endpointConfig: EndpointConfig = EndpointConfig(),  
58 - var enableEndpoint: Boolean = true,  
59 - var decodingMethod: String = "greedy_search",  
60 - var maxActivePaths: Int = 4,  
61 - var hotwordsFile: String = "",  
62 - var hotwordsScore: Float = 1.5f,  
63 -)  
64 -  
65 -data class OfflineTransducerModelConfig(  
66 - var encoder: String = "",  
67 - var decoder: String = "",  
68 - var joiner: String = "",  
69 -)  
70 -  
71 -data class OfflineParaformerModelConfig(  
72 - var model: String = "",  
73 -)  
74 -  
75 -data class OfflineWhisperModelConfig(  
76 - var encoder: String = "",  
77 - var decoder: String = "",  
78 - var language: String = "en", // Used with multilingual model  
79 - var task: String = "transcribe", // transcribe or translate  
80 - var tailPaddings: Int = 1000, // Padding added at the end of the samples  
81 -)  
82 -  
83 -data class OfflineModelConfig(  
84 - var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(),  
85 - var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(),  
86 - var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(),  
87 - var numThreads: Int = 1,  
88 - var debug: Boolean = false,  
89 - var provider: String = "cpu",  
90 - var modelType: String = "",  
91 - var tokens: String,  
92 -)  
93 -  
94 -data class OfflineRecognizerConfig(  
95 - var featConfig: FeatureConfig = FeatureConfig(),  
96 - var modelConfig: OfflineModelConfig,  
97 - // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it  
98 - var decodingMethod: String = "greedy_search",  
99 - var maxActivePaths: Int = 4,  
100 - var hotwordsFile: String = "",  
101 - var hotwordsScore: Float = 1.5f,  
102 -)  
103 -  
104 -class SherpaOnnx(  
105 - assetManager: AssetManager? = null,  
106 - var config: OnlineRecognizerConfig,  
107 -) {  
108 - private val ptr: Long  
109 -  
110 - init {  
111 - if (assetManager != null) {  
112 - ptr = new(assetManager, config)  
113 - } else {  
114 - ptr = newFromFile(config)  
115 - }  
116 - }  
117 -  
118 - protected fun finalize() {  
119 - delete(ptr)  
120 - }  
121 -  
122 - fun acceptWaveform(samples: FloatArray, sampleRate: Int) =  
123 - acceptWaveform(ptr, samples, sampleRate)  
124 -  
125 - fun inputFinished() = inputFinished(ptr)  
126 - fun reset(recreate: Boolean = false, hotwords: String = "") = reset(ptr, recreate, hotwords)  
127 - fun decode() = decode(ptr)  
128 - fun isEndpoint(): Boolean = isEndpoint(ptr)  
129 - fun isReady(): Boolean = isReady(ptr)  
130 -  
131 - val text: String  
132 - get() = getText(ptr)  
133 -  
134 - val tokens: Array<String>  
135 - get() = getTokens(ptr)  
136 -  
137 - private external fun delete(ptr: Long)  
138 -  
139 - private external fun new(  
140 - assetManager: AssetManager,  
141 - config: OnlineRecognizerConfig,  
142 - ): Long  
143 -  
144 - private external fun newFromFile(  
145 - config: OnlineRecognizerConfig,  
146 - ): Long  
147 -  
148 - private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int)  
149 - private external fun inputFinished(ptr: Long)  
150 - private external fun getText(ptr: Long): String  
151 - private external fun reset(ptr: Long, recreate: Boolean, hotwords: String)  
152 - private external fun decode(ptr: Long)  
153 - private external fun isEndpoint(ptr: Long): Boolean  
154 - private external fun isReady(ptr: Long): Boolean  
155 - private external fun getTokens(ptr: Long): Array<String>  
156 -  
157 - companion object {  
158 - init {  
159 - System.loadLibrary("sherpa-onnx-jni")  
160 - }  
161 - }  
162 -}  
163 -  
164 -class SherpaOnnxOffline(  
165 - assetManager: AssetManager? = null,  
166 - var config: OfflineRecognizerConfig,  
167 -) {  
168 - private val ptr: Long  
169 -  
170 - init {  
171 - if (assetManager != null) {  
172 - ptr = new(assetManager, config)  
173 - } else {  
174 - ptr = newFromFile(config)  
175 - }  
176 - }  
177 -  
178 - protected fun finalize() {  
179 - delete(ptr)  
180 - }  
181 -  
182 - fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate)  
183 -  
184 - private external fun delete(ptr: Long)  
185 -  
186 - private external fun new(  
187 - assetManager: AssetManager,  
188 - config: OfflineRecognizerConfig,  
189 - ): Long  
190 -  
191 - private external fun newFromFile(  
192 - config: OfflineRecognizerConfig,  
193 - ): Long  
194 -  
195 - private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String  
196 -  
197 - companion object {  
198 - init {  
199 - System.loadLibrary("sherpa-onnx-jni")  
200 - }  
201 - }  
202 -}  
203 -  
204 -fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig {  
205 - return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim)  
206 -}  
207 -  
208 -/*  
209 -Please see  
210 -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
211 -for a list of pre-trained models.  
212 -  
213 -We only add a few here. Please change the following code  
214 -to add your own. (It should be straightforward to add a new model  
215 -by following the code)  
216 -  
217 -@param type  
218 -0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese)  
219 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23  
220 - encoder/joiner int8, decoder float32  
221 -  
222 -1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English)  
223 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english  
224 - encoder/joiner int8, decoder fp32  
225 -  
226 - */  
227 -fun getModelConfig(type: Int): OnlineModelConfig? {  
228 - when (type) {  
229 - 0 -> {  
230 - val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23"  
231 - return OnlineModelConfig(  
232 - transducer = OnlineTransducerModelConfig(  
233 - encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",  
234 - decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",  
235 - joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",  
236 - ),  
237 - tokens = "$modelDir/tokens.txt",  
238 - modelType = "zipformer",  
239 - )  
240 - }  
241 -  
242 - 1 -> {  
243 - val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17"  
244 - return OnlineModelConfig(  
245 - transducer = OnlineTransducerModelConfig(  
246 - encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx",  
247 - decoder = "$modelDir/decoder-epoch-99-avg-1.onnx",  
248 - joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx",  
249 - ),  
250 - tokens = "$modelDir/tokens.txt",  
251 - modelType = "zipformer",  
252 - )  
253 - }  
254 - }  
255 - return null  
256 -}  
257 -  
258 -/*  
259 -Please see  
260 -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
261 -for a list of pre-trained models.  
262 -  
263 -We only add a few here. Please change the following code  
264 -to add your own LM model. (It should be straightforward to train a new NN LM model  
265 -by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py)  
266 -  
267 -@param type  
268 -0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)  
269 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english  
270 - */  
271 -fun getOnlineLMConfig(type: Int): OnlineLMConfig {  
272 - when (type) {  
273 - 0 -> {  
274 - val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20"  
275 - return OnlineLMConfig(  
276 - model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx",  
277 - scale = 0.5f,  
278 - )  
279 - }  
280 - }  
281 - return OnlineLMConfig()  
282 -}  
283 -  
284 -// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8  
285 -fun getEndpointConfig(): EndpointConfig {  
286 - return EndpointConfig(  
287 - rule1 = EndpointRule(false, 2.4f, 0.0f),  
288 - rule2 = EndpointRule(true, 0.8f, 0.0f),  
289 - rule3 = EndpointRule(false, 0.0f, 20.0f)  
290 - )  
291 -}  
292 -  
293 -/*  
294 -Please see  
295 -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
296 -for a list of pre-trained models.  
297 -  
298 -We only add a few here. Please change the following code  
299 -to add your own. (It should be straightforward to add a new model  
300 -by following the code)  
301 -  
302 -@param type  
303 -  
304 -0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese)  
305 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese  
306 - int8  
307 -  
308 -1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English)  
309 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english  
310 - encoder int8, decoder/joiner float32  
311 -  
312 -2 - sherpa-onnx-whisper-tiny.en  
313 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en  
314 - encoder int8, decoder int8  
315 -  
316 -3 - sherpa-onnx-whisper-base.en  
317 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en  
318 - encoder int8, decoder int8  
319 -  
320 -4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese)  
321 - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese  
322 - encoder/joiner int8, decoder fp32  
323 -  
324 - */  
325 -fun getOfflineModelConfig(type: Int): OfflineModelConfig? {  
326 - when (type) {  
327 - 0 -> {  
328 - val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28"  
329 - return OfflineModelConfig(  
330 - paraformer = OfflineParaformerModelConfig(  
331 - model = "$modelDir/model.int8.onnx",  
332 - ),  
333 - tokens = "$modelDir/tokens.txt",  
334 - modelType = "paraformer",  
335 - )  
336 - }  
337 -  
338 - 1 -> {  
339 - val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04"  
340 - return OfflineModelConfig(  
341 - transducer = OfflineTransducerModelConfig(  
342 - encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx",  
343 - decoder = "$modelDir/decoder-epoch-30-avg-4.onnx",  
344 - joiner = "$modelDir/joiner-epoch-30-avg-4.onnx",  
345 - ),  
346 - tokens = "$modelDir/tokens.txt",  
347 - modelType = "zipformer",  
348 - )  
349 - }  
350 -  
351 - 2 -> {  
352 - val modelDir = "sherpa-onnx-whisper-tiny.en"  
353 - return OfflineModelConfig(  
354 - whisper = OfflineWhisperModelConfig(  
355 - encoder = "$modelDir/tiny.en-encoder.int8.onnx",  
356 - decoder = "$modelDir/tiny.en-decoder.int8.onnx",  
357 - ),  
358 - tokens = "$modelDir/tiny.en-tokens.txt",  
359 - modelType = "whisper",  
360 - )  
361 - }  
362 -  
363 - 3 -> {  
364 - val modelDir = "sherpa-onnx-whisper-base.en"  
365 - return OfflineModelConfig(  
366 - whisper = OfflineWhisperModelConfig(  
367 - encoder = "$modelDir/base.en-encoder.int8.onnx",  
368 - decoder = "$modelDir/base.en-decoder.int8.onnx",  
369 - ),  
370 - tokens = "$modelDir/base.en-tokens.txt",  
371 - modelType = "whisper",  
372 - )  
373 - }  
374 -  
375 -  
376 - 4 -> {  
377 - val modelDir = "icefall-asr-zipformer-wenetspeech-20230615"  
378 - return OfflineModelConfig(  
379 - transducer = OfflineTransducerModelConfig(  
380 - encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx",  
381 - decoder = "$modelDir/decoder-epoch-12-avg-4.onnx",  
382 - joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx",  
383 - ),  
384 - tokens = "$modelDir/tokens.txt",  
385 - modelType = "zipformer",  
386 - )  
387 - }  
388 -  
389 - 5 -> {  
390 - val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2"  
391 - return OfflineModelConfig(  
392 - transducer = OfflineTransducerModelConfig(  
393 - encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx",  
394 - decoder = "$modelDir/decoder-epoch-20-avg-1.onnx",  
395 - joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx",  
396 - ),  
397 - tokens = "$modelDir/tokens.txt",  
398 - modelType = "zipformer2",  
399 - )  
400 - }  
401 -  
402 - }  
403 - return null  
404 -}  
1 -package com.k2fsa.sherpa.onnx  
2 -  
3 -import android.content.res.AssetManager  
4 -  
5 -class WaveReader {  
6 - companion object {  
7 - // Read a mono wave file asset  
8 - // The returned array has two entries:  
9 - // - the first entry contains an 1-D float array  
10 - // - the second entry is the sample rate  
11 - external fun readWaveFromAsset(  
12 - assetManager: AssetManager,  
13 - filename: String,  
14 - ): Array<Any>  
15 -  
16 - // Read a mono wave file from disk  
17 - // The returned array has two entries:  
18 - // - the first entry contains an 1-D float array  
19 - // - the second entry is the sample rate  
20 - external fun readWaveFromFile(  
21 - filename: String,  
22 - ): Array<Any>  
23 -  
24 - init {  
25 - System.loadLibrary("sherpa-onnx-jni")  
26 - }  
27 - }  
28 -}  
  1 +../../../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt
@@ -46,7 +46,6 @@ import androidx.compose.ui.unit.dp @@ -46,7 +46,6 @@ import androidx.compose.ui.unit.dp
46 import androidx.compose.ui.unit.sp 46 import androidx.compose.ui.unit.sp
47 import androidx.core.app.ActivityCompat 47 import androidx.core.app.ActivityCompat
48 import com.k2fsa.sherpa.onnx.AudioEvent 48 import com.k2fsa.sherpa.onnx.AudioEvent
49 -import com.k2fsa.sherpa.onnx.Tagger  
50 import kotlin.concurrent.thread 49 import kotlin.concurrent.thread
51 50
52 51
@@ -13,13 +13,14 @@ import androidx.compose.material3.Surface @@ -13,13 +13,14 @@ import androidx.compose.material3.Surface
13 import androidx.compose.runtime.Composable 13 import androidx.compose.runtime.Composable
14 import androidx.compose.ui.Modifier 14 import androidx.compose.ui.Modifier
15 import androidx.core.app.ActivityCompat 15 import androidx.core.app.ActivityCompat
16 -import com.k2fsa.sherpa.onnx.Tagger  
17 import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme 16 import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme
18 17
19 const val TAG = "sherpa-onnx" 18 const val TAG = "sherpa-onnx"
20 19
21 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 20 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
22 21
  22 +// adb emu avd hostmicon
  23 +// to enable mic inside the emulator
23 class MainActivity : ComponentActivity() { 24 class MainActivity : ComponentActivity() {
24 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) 25 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
25 override fun onCreate(savedInstanceState: Bundle?) { 26 override fun onCreate(savedInstanceState: Bundle?) {
  1 +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
1 -package com.k2fsa.sherpa.onnx 1 +package com.k2fsa.sherpa.onnx.audio.tagging
2 2
3 import android.content.res.AssetManager 3 import android.content.res.AssetManager
4 import android.util.Log 4 import android.util.Log
  5 +import com.k2fsa.sherpa.onnx.AudioTagging
  6 +import com.k2fsa.sherpa.onnx.getAudioTaggingConfig
5 7
6 8
7 object Tagger { 9 object Tagger {
@@ -17,7 +19,7 @@ object Tagger { @@ -17,7 +19,7 @@ object Tagger {
17 return 19 return
18 } 20 }
19 21
20 - Log.i(TAG, "Initializing audio tagger") 22 + Log.i("sherpa-onnx", "Initializing audio tagger")
21 val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!! 23 val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!!
22 _tagger = AudioTagging(assetManager, config) 24 _tagger = AudioTagging(assetManager, config)
23 } 25 }
@@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button @@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button
33 import androidx.wear.compose.material.MaterialTheme 33 import androidx.wear.compose.material.MaterialTheme
34 import androidx.wear.compose.material.Text 34 import androidx.wear.compose.material.Text
35 import com.k2fsa.sherpa.onnx.AudioEvent 35 import com.k2fsa.sherpa.onnx.AudioEvent
36 -import com.k2fsa.sherpa.onnx.Tagger 36 +import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
37 import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme 37 import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme
38 import kotlin.concurrent.thread 38 import kotlin.concurrent.thread
39 39
@@ -17,11 +17,14 @@ import androidx.activity.compose.setContent @@ -17,11 +17,14 @@ import androidx.activity.compose.setContent
17 import androidx.compose.runtime.Composable 17 import androidx.compose.runtime.Composable
18 import androidx.core.app.ActivityCompat 18 import androidx.core.app.ActivityCompat
19 import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen 19 import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen
20 -import com.k2fsa.sherpa.onnx.Tagger 20 +import com.k2fsa.sherpa.onnx.audio.tagging.Tagger
21 21
22 const val TAG = "sherpa-onnx" 22 const val TAG = "sherpa-onnx"
23 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 23 private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
24 24
  25 +// adb emu avd hostmicon
  26 +// to enable mic inside the emulator
  27 +
25 class MainActivity : ComponentActivity() { 28 class MainActivity : ComponentActivity() {
26 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) 29 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
27 override fun onCreate(savedInstanceState: Bundle?) { 30 override fun onCreate(savedInstanceState: Bundle?) {
@@ -15,7 +15,8 @@ @@ -15,7 +15,8 @@
15 android:theme="@style/Theme.SherpaOnnx" 15 android:theme="@style/Theme.SherpaOnnx"
16 tools:targetApi="31"> 16 tools:targetApi="31">
17 <activity 17 <activity
18 - android:name=".MainActivity" 18 + android:name=".kws.MainActivity"
  19 + android:label="Keyword-spotter"
19 android:exported="true"> 20 android:exported="true">
20 <intent-filter> 21 <intent-filter>
21 <action android:name="android.intent.action.MAIN" /> 22 <action android:name="android.intent.action.MAIN" />
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt
1 -package com.k2fsa.sherpa.onnx 1 +package com.k2fsa.sherpa.onnx.kws
2 2
3 import android.Manifest 3 import android.Manifest
4 import android.content.pm.PackageManager 4 import android.content.pm.PackageManager
@@ -14,7 +14,13 @@ import android.widget.TextView @@ -14,7 +14,13 @@ import android.widget.TextView
14 import android.widget.Toast 14 import android.widget.Toast
15 import androidx.appcompat.app.AppCompatActivity 15 import androidx.appcompat.app.AppCompatActivity
16 import androidx.core.app.ActivityCompat 16 import androidx.core.app.ActivityCompat
17 -import com.k2fsa.sherpa.onnx.* 17 +import com.k2fsa.sherpa.onnx.KeywordSpotter
  18 +import com.k2fsa.sherpa.onnx.KeywordSpotterConfig
  19 +import com.k2fsa.sherpa.onnx.OnlineStream
  20 +import com.k2fsa.sherpa.onnx.R
  21 +import com.k2fsa.sherpa.onnx.getFeatureConfig
  22 +import com.k2fsa.sherpa.onnx.getKeywordsFile
  23 +import com.k2fsa.sherpa.onnx.getKwsModelConfig
18 import kotlin.concurrent.thread 24 import kotlin.concurrent.thread
19 25
20 private const val TAG = "sherpa-onnx" 26 private const val TAG = "sherpa-onnx"
@@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 @@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200
23 class MainActivity : AppCompatActivity() { 29 class MainActivity : AppCompatActivity() {
24 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) 30 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
25 31
26 - private lateinit var model: SherpaOnnxKws 32 + private lateinit var kws: KeywordSpotter
  33 + private lateinit var stream: OnlineStream
27 private var audioRecord: AudioRecord? = null 34 private var audioRecord: AudioRecord? = null
28 private lateinit var recordButton: Button 35 private lateinit var recordButton: Button
29 private lateinit var textView: TextView 36 private lateinit var textView: TextView
@@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() { @@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() {
87 94
88 Log.i(TAG, keywords) 95 Log.i(TAG, keywords)
89 keywords = keywords.replace("\n", "/") 96 keywords = keywords.replace("\n", "/")
  97 + keywords = keywords.trim()
90 // If keywords is an empty string, it just resets the decoding stream 98 // If keywords is an empty string, it just resets the decoding stream
91 // always returns true in this case. 99 // always returns true in this case.
92 // If keywords is not empty, it will create a new decoding stream with 100 // If keywords is not empty, it will create a new decoding stream with
93 // the given keywords appended to the default keywords. 101 // the given keywords appended to the default keywords.
94 - // Return false if errors occured when adding keywords, true otherwise.  
95 - val status = model.reset(keywords)  
96 - if (!status) {  
97 - Log.i(TAG, "Failed to reset with keywords.")  
98 - Toast.makeText(this, "Failed to set keywords.", Toast.LENGTH_LONG).show(); 102 + // Return false if errors occurred when adding keywords, true otherwise.
  103 + stream.release()
  104 + stream = kws.createStream(keywords)
  105 + if (stream.ptr == 0L) {
  106 + Log.i(TAG, "Failed to create stream with keywords: $keywords")
  107 + Toast.makeText(this, "Failed to set keywords to $keywords.", Toast.LENGTH_LONG)
  108 + .show()
99 return 109 return
100 } 110 }
101 111
@@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() { @@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() {
122 audioRecord!!.release() 132 audioRecord!!.release()
123 audioRecord = null 133 audioRecord = null
124 recordButton.setText(R.string.start) 134 recordButton.setText(R.string.start)
  135 + stream.release()
125 Log.i(TAG, "Stopped recording") 136 Log.i(TAG, "Stopped recording")
126 } 137 }
127 } 138 }
@@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() { @@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() {
137 val ret = audioRecord?.read(buffer, 0, buffer.size) 148 val ret = audioRecord?.read(buffer, 0, buffer.size)
138 if (ret != null && ret > 0) { 149 if (ret != null && ret > 0) {
139 val samples = FloatArray(ret) { buffer[it] / 32768.0f } 150 val samples = FloatArray(ret) { buffer[it] / 32768.0f }
140 - model.acceptWaveform(samples, sampleRate=sampleRateInHz)  
141 - while (model.isReady()) {  
142 - model.decode() 151 + stream.acceptWaveform(samples, sampleRate = sampleRateInHz)
  152 + while (kws.isReady(stream)) {
  153 + kws.decode(stream)
143 } 154 }
144 155
145 - val text = model.keyword 156 + val text = kws.getResult(stream).keyword
146 157
147 - var textToDisplay = lastText; 158 + var textToDisplay = lastText
148 159
149 - if(text.isNotBlank()) { 160 + if (text.isNotBlank()) {
150 if (lastText.isBlank()) { 161 if (lastText.isBlank()) {
151 - textToDisplay = "${idx}: ${text}" 162 + textToDisplay = "$idx: $text"
152 } else { 163 } else {
153 - textToDisplay = "${idx}: ${text}\n${lastText}" 164 + textToDisplay = "$idx: $text\n$lastText"
154 } 165 }
155 - lastText = "${idx}: ${text}\n${lastText}" 166 + lastText = "$idx: $text\n$lastText"
156 idx += 1 167 idx += 1
157 } 168 }
158 169
@@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() { @@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() {
188 } 199 }
189 200
190 private fun initModel() { 201 private fun initModel() {
191 - // Please change getModelConfig() to add new models 202 + // Please change getKwsModelConfig() to add new models
192 // See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html 203 // See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html
193 // for a list of available models 204 // for a list of available models
194 val type = 0 205 val type = 0
195 - Log.i(TAG, "Select model type ${type}") 206 + Log.i(TAG, "Select model type $type")
196 val config = KeywordSpotterConfig( 207 val config = KeywordSpotterConfig(
197 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), 208 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
198 - modelConfig = getModelConfig(type = type)!!,  
199 - keywordsFile = getKeywordsFile(type = type)!!, 209 + modelConfig = getKwsModelConfig(type = type)!!,
  210 + keywordsFile = getKeywordsFile(type = type),
200 ) 211 )
201 212
202 - model = SherpaOnnxKws( 213 + kws = KeywordSpotter(
203 assetManager = application.assets, 214 assetManager = application.assets,
204 config = config, 215 config = config,
205 ) 216 )
  217 + stream = kws.createStream()
206 } 218 }
207 -} 219 +}
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt
1 -// Copyright (c) 2023 Xiaomi Corporation  
2 -package com.k2fsa.sherpa.onnx  
3 -  
4 -import android.content.res.AssetManager  
5 -  
6 -class WaveReader {  
7 - companion object {  
8 - // Read a mono wave file asset  
9 - // The returned array has two entries:  
10 - // - the first entry contains an 1-D float array  
11 - // - the second entry is the sample rate  
12 - external fun readWaveFromAsset(  
13 - assetManager: AssetManager,  
14 - filename: String,  
15 - ): Array<Any>  
16 -  
17 - // Read a mono wave file from disk  
18 - // The returned array has two entries:  
19 - // - the first entry contains an 1-D float array  
20 - // - the second entry is the sample rate  
21 - external fun readWaveFromFile(  
22 - filename: String,  
23 - ): Array<Any>  
24 -  
25 - init {  
26 - System.loadLibrary("sherpa-onnx-jni")  
27 - }  
28 - }  
29 -}  
1 <resources> 1 <resources>
2 - <string name="app_name">KWS with Next-gen Kaldi</string> 2 + <string name="app_name">Keyword spotting</string>
3 <string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi. 3 <string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi.
4 \n 4 \n
5 \n\n\n 5 \n\n\n
6 The source code and pre-trained models are publicly available. 6 The source code and pre-trained models are publicly available.
7 Please see https://github.com/k2-fsa/sherpa-onnx for details. 7 Please see https://github.com/k2-fsa/sherpa-onnx for details.
8 </string> 8 </string>
9 - <string name="keyword_hint">Input your keywords here, one keyword perline.</string> 9 + <string name="keyword_hint">Input your keywords here, one keyword per line.\nTwo example keywords are given below:\n\nn ǐ h ǎo @你好\nd àn g ē d àn g ē @蛋哥蛋哥</string>
10 <string name="start">Start</string> 10 <string name="start">Start</string>
11 <string name="stop">Stop</string> 11 <string name="stop">Stop</string>
12 </resources> 12 </resources>
@@ -2,7 +2,7 @@ package com.k2fsa.sherpa.onnx.speaker.identification @@ -2,7 +2,7 @@ package com.k2fsa.sherpa.onnx.speaker.identification
2 2
3 import androidx.compose.ui.graphics.vector.ImageVector 3 import androidx.compose.ui.graphics.vector.ImageVector
4 4
5 -data class BarItem ( 5 +data class BarItem(
6 val title: String, 6 val title: String,
7 7
8 // see https://www.composables.com/icons 8 // see https://www.composables.com/icons
1 package com.k2fsa.sherpa.onnx.speaker.identification 1 package com.k2fsa.sherpa.onnx.speaker.identification
2 2
3 sealed class NavRoutes(val route: String) { 3 sealed class NavRoutes(val route: String) {
4 - object Home: NavRoutes("home")  
5 - object Register: NavRoutes("register")  
6 - object View: NavRoutes("view")  
7 - object Help: NavRoutes("help") 4 + object Home : NavRoutes("home")
  5 + object Register : NavRoutes("register")
  6 + object View : NavRoutes("view")
  7 + object Help : NavRoutes("help")
8 } 8 }
1 -@file:OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class) 1 +@file:OptIn(ExperimentalMaterial3Api::class)
2 2
3 package com.k2fsa.sherpa.onnx.slid 3 package com.k2fsa.sherpa.onnx.slid
4 4
@@ -9,11 +9,9 @@ import android.media.AudioFormat @@ -9,11 +9,9 @@ import android.media.AudioFormat
9 import android.media.AudioRecord 9 import android.media.AudioRecord
10 import android.media.MediaRecorder 10 import android.media.MediaRecorder
11 import android.util.Log 11 import android.util.Log
12 -import androidx.compose.foundation.ExperimentalFoundationApi  
13 import androidx.compose.foundation.layout.Box 12 import androidx.compose.foundation.layout.Box
14 import androidx.compose.foundation.layout.Column 13 import androidx.compose.foundation.layout.Column
15 import androidx.compose.foundation.layout.PaddingValues 14 import androidx.compose.foundation.layout.PaddingValues
16 -import androidx.compose.ui.Modifier  
17 import androidx.compose.foundation.layout.Spacer 15 import androidx.compose.foundation.layout.Spacer
18 import androidx.compose.foundation.layout.fillMaxSize 16 import androidx.compose.foundation.layout.fillMaxSize
19 import androidx.compose.foundation.layout.height 17 import androidx.compose.foundation.layout.height
@@ -31,6 +29,7 @@ import androidx.compose.runtime.mutableStateOf @@ -31,6 +29,7 @@ import androidx.compose.runtime.mutableStateOf
31 import androidx.compose.runtime.remember 29 import androidx.compose.runtime.remember
32 import androidx.compose.runtime.setValue 30 import androidx.compose.runtime.setValue
33 import androidx.compose.ui.Alignment 31 import androidx.compose.ui.Alignment
  32 +import androidx.compose.ui.Modifier
34 import androidx.compose.ui.platform.LocalContext 33 import androidx.compose.ui.platform.LocalContext
35 import androidx.compose.ui.text.font.FontWeight 34 import androidx.compose.ui.text.font.FontWeight
36 import androidx.compose.ui.unit.dp 35 import androidx.compose.ui.unit.dp
@@ -63,13 +62,13 @@ fun Home() { @@ -63,13 +62,13 @@ fun Home() {
63 } 62 }
64 63
65 private var audioRecord: AudioRecord? = null 64 private var audioRecord: AudioRecord? = null
66 -private val sampleRateInHz = 16000 65 +private const val sampleRateInHz = 16000
67 66
68 @Composable 67 @Composable
69 fun MyApp(padding: PaddingValues) { 68 fun MyApp(padding: PaddingValues) {
70 val activity = LocalContext.current as Activity 69 val activity = LocalContext.current as Activity
71 var isStarted by remember { mutableStateOf(false) } 70 var isStarted by remember { mutableStateOf(false) }
72 - var result by remember { mutableStateOf<String>("") } 71 + var result by remember { mutableStateOf("") }
73 72
74 val onButtonClick: () -> Unit = { 73 val onButtonClick: () -> Unit = {
75 isStarted = !isStarted 74 isStarted = !isStarted
@@ -114,12 +113,12 @@ fun MyApp(padding: PaddingValues) { @@ -114,12 +113,12 @@ fun MyApp(padding: PaddingValues) {
114 } 113 }
115 Log.i(TAG, "Stop recording") 114 Log.i(TAG, "Stop recording")
116 Log.i(TAG, "Start recognition") 115 Log.i(TAG, "Start recognition")
117 - val samples = Flatten(sampleList) 116 + val samples = flatten(sampleList)
118 val stream = Slid.slid.createStream() 117 val stream = Slid.slid.createStream()
119 stream.acceptWaveform(samples, sampleRateInHz) 118 stream.acceptWaveform(samples, sampleRateInHz)
120 val lang = Slid.slid.compute(stream) 119 val lang = Slid.slid.compute(stream)
121 120
122 - result = Slid.localeMap.get(lang) ?: lang 121 + result = Slid.localeMap[lang] ?: lang
123 122
124 stream.release() 123 stream.release()
125 } 124 }
@@ -152,7 +151,7 @@ fun MyApp(padding: PaddingValues) { @@ -152,7 +151,7 @@ fun MyApp(padding: PaddingValues) {
152 } 151 }
153 } 152 }
154 153
155 -fun Flatten(sampleList: ArrayList<FloatArray>): FloatArray { 154 +fun flatten(sampleList: ArrayList<FloatArray>): FloatArray {
156 var totalSamples = 0 155 var totalSamples = 0
157 for (a in sampleList) { 156 for (a in sampleList) {
158 totalSamples += a.size 157 totalSamples += a.size
@@ -10,12 +10,9 @@ import androidx.activity.compose.setContent @@ -10,12 +10,9 @@ import androidx.activity.compose.setContent
10 import androidx.compose.foundation.layout.fillMaxSize 10 import androidx.compose.foundation.layout.fillMaxSize
11 import androidx.compose.material3.MaterialTheme 11 import androidx.compose.material3.MaterialTheme
12 import androidx.compose.material3.Surface 12 import androidx.compose.material3.Surface
13 -import androidx.compose.material3.Text  
14 import androidx.compose.runtime.Composable 13 import androidx.compose.runtime.Composable
15 import androidx.compose.ui.Modifier 14 import androidx.compose.ui.Modifier
16 -import androidx.compose.ui.tooling.preview.Preview  
17 import androidx.core.app.ActivityCompat 15 import androidx.core.app.ActivityCompat
18 -import com.k2fsa.sherpa.onnx.SpokenLanguageIdentification  
19 import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme 16 import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme
20 17
21 const val TAG = "sherpa-onnx" 18 const val TAG = "sherpa-onnx"
@@ -32,6 +29,7 @@ class MainActivity : ComponentActivity() { @@ -32,6 +29,7 @@ class MainActivity : ComponentActivity() {
32 ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) 29 ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION)
33 Slid.initSlid(this.assets) 30 Slid.initSlid(this.assets)
34 } 31 }
  32 +
35 @Suppress("DEPRECATION") 33 @Suppress("DEPRECATION")
36 @Deprecated("Deprecated in Java") 34 @Deprecated("Deprecated in Java")
37 override fun onRequestPermissionsResult( 35 override fun onRequestPermissionsResult(
1 -../../../../../../../../../../SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt  
  1 +../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
  1 +../../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
@@ -15,10 +15,10 @@ object Slid { @@ -15,10 +15,10 @@ object Slid {
15 get() { 15 get() {
16 return _slid!! 16 return _slid!!
17 } 17 }
18 - val localeMap : Map<String, String>  
19 - get() {  
20 - return _localeMap  
21 - } 18 + val localeMap: Map<String, String>
  19 + get() {
  20 + return _localeMap
  21 + }
22 22
23 fun initSlid(assetManager: AssetManager? = null, numThreads: Int = 1) { 23 fun initSlid(assetManager: AssetManager? = null, numThreads: Int = 1) {
24 synchronized(this) { 24 synchronized(this) {
@@ -31,7 +31,7 @@ object Slid { @@ -31,7 +31,7 @@ object Slid {
31 } 31 }
32 32
33 if (_localeMap.isEmpty()) { 33 if (_localeMap.isEmpty()) {
34 - val allLang = Locale.getISOLanguages(); 34 + val allLang = Locale.getISOLanguages()
35 for (lang in allLang) { 35 for (lang in allLang) {
36 val locale = Locale(lang) 36 val locale = Locale(lang)
37 _localeMap[lang] = locale.displayName 37 _localeMap[lang] = locale.displayName
1 package com.k2fsa.sherpa.onnx 1 package com.k2fsa.sherpa.onnx
2 2
3 import android.content.res.AssetManager 3 import android.content.res.AssetManager
4 -import android.media.* 4 +import android.media.AudioAttributes
  5 +import android.media.AudioFormat
  6 +import android.media.AudioManager
  7 +import android.media.AudioTrack
  8 +import android.media.MediaPlayer
5 import android.net.Uri 9 import android.net.Uri
6 import android.os.Bundle 10 import android.os.Bundle
7 import android.util.Log 11 import android.util.Log
@@ -212,7 +216,7 @@ class MainActivity : AppCompatActivity() { @@ -212,7 +216,7 @@ class MainActivity : AppCompatActivity() {
212 } 216 }
213 217
214 if (dictDir != null) { 218 if (dictDir != null) {
215 - val newDir = copyDataDir( modelDir!!) 219 + val newDir = copyDataDir(modelDir!!)
216 modelDir = newDir + "/" + modelDir 220 modelDir = newDir + "/" + modelDir
217 dictDir = modelDir + "/" + "dict" 221 dictDir = modelDir + "/" + "dict"
218 ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" 222 ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
@@ -220,7 +224,9 @@ class MainActivity : AppCompatActivity() { @@ -220,7 +224,9 @@ class MainActivity : AppCompatActivity() {
220 } 224 }
221 225
222 val config = getOfflineTtsConfig( 226 val config = getOfflineTtsConfig(
223 - modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", 227 + modelDir = modelDir!!,
  228 + modelName = modelName!!,
  229 + lexicon = lexicon ?: "",
224 dataDir = dataDir ?: "", 230 dataDir = dataDir ?: "",
225 dictDir = dictDir ?: "", 231 dictDir = dictDir ?: "",
226 ruleFsts = ruleFsts ?: "", 232 ruleFsts = ruleFsts ?: "",
@@ -232,11 +238,11 @@ class MainActivity : AppCompatActivity() { @@ -232,11 +238,11 @@ class MainActivity : AppCompatActivity() {
232 238
233 239
234 private fun copyDataDir(dataDir: String): String { 240 private fun copyDataDir(dataDir: String): String {
235 - println("data dir is $dataDir") 241 + Log.i(TAG, "data dir is $dataDir")
236 copyAssets(dataDir) 242 copyAssets(dataDir)
237 243
238 val newDataDir = application.getExternalFilesDir(null)!!.absolutePath 244 val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
239 - println("newDataDir: $newDataDir") 245 + Log.i(TAG, "newDataDir: $newDataDir")
240 return newDataDir 246 return newDataDir
241 } 247 }
242 248
@@ -256,7 +262,7 @@ class MainActivity : AppCompatActivity() { @@ -256,7 +262,7 @@ class MainActivity : AppCompatActivity() {
256 } 262 }
257 } 263 }
258 } catch (ex: IOException) { 264 } catch (ex: IOException) {
259 - Log.e(TAG, "Failed to copy $path. ${ex.toString()}") 265 + Log.e(TAG, "Failed to copy $path. $ex")
260 } 266 }
261 } 267 }
262 268
@@ -276,7 +282,7 @@ class MainActivity : AppCompatActivity() { @@ -276,7 +282,7 @@ class MainActivity : AppCompatActivity() {
276 ostream.flush() 282 ostream.flush()
277 ostream.close() 283 ostream.close()
278 } catch (ex: Exception) { 284 } catch (ex: Exception) {
279 - Log.e(TAG, "Failed to copy $filename, ${ex.toString()}") 285 + Log.e(TAG, "Failed to copy $filename, $ex")
280 } 286 }
281 } 287 }
282 } 288 }
@@ -49,10 +49,10 @@ class OfflineTts( @@ -49,10 +49,10 @@ class OfflineTts(
49 private var ptr: Long 49 private var ptr: Long
50 50
51 init { 51 init {
52 - if (assetManager != null) {  
53 - ptr = newFromAsset(assetManager, config) 52 + ptr = if (assetManager != null) {
  53 + newFromAsset(assetManager, config)
54 } else { 54 } else {
55 - ptr = newFromFile(config) 55 + newFromFile(config)
56 } 56 }
57 } 57 }
58 58
@@ -65,7 +65,7 @@ class OfflineTts( @@ -65,7 +65,7 @@ class OfflineTts(
65 sid: Int = 0, 65 sid: Int = 0,
66 speed: Float = 1.0f 66 speed: Float = 1.0f
67 ): GeneratedAudio { 67 ): GeneratedAudio {
68 - var objArray = generateImpl(ptr, text = text, sid = sid, speed = speed) 68 + val objArray = generateImpl(ptr, text = text, sid = sid, speed = speed)
69 return GeneratedAudio( 69 return GeneratedAudio(
70 samples = objArray[0] as FloatArray, 70 samples = objArray[0] as FloatArray,
71 sampleRate = objArray[1] as Int 71 sampleRate = objArray[1] as Int
@@ -78,7 +78,13 @@ class OfflineTts( @@ -78,7 +78,13 @@ class OfflineTts(
78 speed: Float = 1.0f, 78 speed: Float = 1.0f,
79 callback: (samples: FloatArray) -> Unit 79 callback: (samples: FloatArray) -> Unit
80 ): GeneratedAudio { 80 ): GeneratedAudio {
81 - var objArray = generateWithCallbackImpl(ptr, text = text, sid = sid, speed = speed, callback=callback) 81 + val objArray = generateWithCallbackImpl(
  82 + ptr,
  83 + text = text,
  84 + sid = sid,
  85 + speed = speed,
  86 + callback = callback
  87 + )
82 return GeneratedAudio( 88 return GeneratedAudio(
83 samples = objArray[0] as FloatArray, 89 samples = objArray[0] as FloatArray,
84 sampleRate = objArray[1] as Int 90 sampleRate = objArray[1] as Int
@@ -87,10 +93,10 @@ class OfflineTts( @@ -87,10 +93,10 @@ class OfflineTts(
87 93
88 fun allocate(assetManager: AssetManager? = null) { 94 fun allocate(assetManager: AssetManager? = null) {
89 if (ptr == 0L) { 95 if (ptr == 0L) {
90 - if (assetManager != null) {  
91 - ptr = newFromAsset(assetManager, config) 96 + ptr = if (assetManager != null) {
  97 + newFromAsset(assetManager, config)
92 } else { 98 } else {
93 - ptr = newFromFile(config) 99 + newFromFile(config)
94 } 100 }
95 } 101 }
96 } 102 }
@@ -103,9 +109,14 @@ class OfflineTts( @@ -103,9 +109,14 @@ class OfflineTts(
103 } 109 }
104 110
105 protected fun finalize() { 111 protected fun finalize() {
106 - delete(ptr) 112 + if (ptr != 0L) {
  113 + delete(ptr)
  114 + ptr = 0
  115 + }
107 } 116 }
108 117
  118 + fun release() = finalize()
  119 +
109 private external fun newFromAsset( 120 private external fun newFromAsset(
110 assetManager: AssetManager, 121 assetManager: AssetManager,
111 config: OfflineTtsConfig, 122 config: OfflineTtsConfig,
@@ -123,14 +134,14 @@ class OfflineTts( @@ -123,14 +134,14 @@ class OfflineTts(
123 // - the first entry is an 1-D float array containing audio samples. 134 // - the first entry is an 1-D float array containing audio samples.
124 // Each sample is normalized to the range [-1, 1] 135 // Each sample is normalized to the range [-1, 1]
125 // - the second entry is the sample rate 136 // - the second entry is the sample rate
126 - external fun generateImpl( 137 + private external fun generateImpl(
127 ptr: Long, 138 ptr: Long,
128 text: String, 139 text: String,
129 sid: Int = 0, 140 sid: Int = 0,
130 speed: Float = 1.0f 141 speed: Float = 1.0f
131 ): Array<Any> 142 ): Array<Any>
132 143
133 - external fun generateWithCallbackImpl( 144 + private external fun generateWithCallbackImpl(
134 ptr: Long, 145 ptr: Long,
135 text: String, 146 text: String,
136 sid: Int = 0, 147 sid: Int = 0,
@@ -156,7 +167,7 @@ fun getOfflineTtsConfig( @@ -156,7 +167,7 @@ fun getOfflineTtsConfig(
156 dictDir: String, 167 dictDir: String,
157 ruleFsts: String, 168 ruleFsts: String,
158 ruleFars: String 169 ruleFars: String
159 -): OfflineTtsConfig? { 170 +): OfflineTtsConfig {
160 return OfflineTtsConfig( 171 return OfflineTtsConfig(
161 model = OfflineTtsModelConfig( 172 model = OfflineTtsModelConfig(
162 vits = OfflineTtsVitsModelConfig( 173 vits = OfflineTtsVitsModelConfig(
1 package com.k2fsa.sherpa.onnx.tts.engine 1 package com.k2fsa.sherpa.onnx.tts.engine
2 2
3 import android.content.Intent 3 import android.content.Intent
4 -import androidx.appcompat.app.AppCompatActivity  
5 import android.os.Bundle 4 import android.os.Bundle
6 import android.speech.tts.TextToSpeech 5 import android.speech.tts.TextToSpeech
  6 +import androidx.appcompat.app.AppCompatActivity
7 7
8 class CheckVoiceData : AppCompatActivity() { 8 class CheckVoiceData : AppCompatActivity() {
9 override fun onCreate(savedInstanceState: Bundle?) { 9 override fun onCreate(savedInstanceState: Bundle?) {
10 super.onCreate(savedInstanceState) 10 super.onCreate(savedInstanceState)
11 val intent = Intent().apply { 11 val intent = Intent().apply {
12 - putStringArrayListExtra(TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, arrayListOf(TtsEngine.lang)) 12 + putStringArrayListExtra(
  13 + TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES,
  14 + arrayListOf(TtsEngine.lang)
  15 + )
13 putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf()) 16 putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf())
14 } 17 }
15 setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent) 18 setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent)
@@ -2,7 +2,6 @@ package com.k2fsa.sherpa.onnx.tts.engine @@ -2,7 +2,6 @@ package com.k2fsa.sherpa.onnx.tts.engine
2 2
3 import android.app.Activity 3 import android.app.Activity
4 import android.content.Intent 4 import android.content.Intent
5 -import androidx.appcompat.app.AppCompatActivity  
6 import android.os.Bundle 5 import android.os.Bundle
7 import android.speech.tts.TextToSpeech 6 import android.speech.tts.TextToSpeech
8 7
@@ -12,120 +11,168 @@ fun getSampleText(lang: String): String { @@ -12,120 +11,168 @@ fun getSampleText(lang: String): String {
12 "ara" -> { 11 "ara" -> {
13 text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي" 12 text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي"
14 } 13 }
  14 +
15 "ben" -> { 15 "ben" -> {
16 text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে" 16 text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে"
17 } 17 }
  18 +
18 "bul" -> { 19 "bul" -> {
19 - text = "Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение" 20 + text =
  21 + "Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение"
20 } 22 }
  23 +
21 "cat" -> { 24 "cat" -> {
22 text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació" 25 text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació"
23 } 26 }
  27 +
24 "ces" -> { 28 "ces" -> {
25 text = "Toto je převodník textu na řeč využívající novou generaci kaldi" 29 text = "Toto je převodník textu na řeč využívající novou generaci kaldi"
26 } 30 }
  31 +
27 "dan" -> { 32 "dan" -> {
28 text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi" 33 text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi"
29 } 34 }
  35 +
30 "deu" -> { 36 "deu" -> {
31 - text = "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet" 37 + text =
  38 + "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet"
32 } 39 }
  40 +
33 "ell" -> { 41 "ell" -> {
34 text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς" 42 text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς"
35 } 43 }
  44 +
36 "eng" -> { 45 "eng" -> {
37 text = "This is a text-to-speech engine using next generation Kaldi" 46 text = "This is a text-to-speech engine using next generation Kaldi"
38 } 47 }
  48 +
39 "est" -> { 49 "est" -> {
40 text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi" 50 text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi"
41 } 51 }
  52 +
42 "fin" -> { 53 "fin" -> {
43 text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia" 54 text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia"
44 } 55 }
  56 +
45 "fra" -> { 57 "fra" -> {
46 text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération" 58 text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération"
47 } 59 }
  60 +
48 "gle" -> { 61 "gle" -> {
49 text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile" 62 text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile"
50 } 63 }
  64 +
51 "hrv" -> { 65 "hrv" -> {
52 - text = "Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije" 66 + text =
  67 + "Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije"
53 } 68 }
  69 +
54 "hun" -> { 70 "hun" -> {
55 text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával" 71 text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával"
56 } 72 }
  73 +
57 "isl" -> { 74 "isl" -> {
58 text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi" 75 text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi"
59 } 76 }
  77 +
60 "ita" -> { 78 "ita" -> {
61 text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione" 79 text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione"
62 } 80 }
  81 +
63 "kat" -> { 82 "kat" -> {
64 text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით" 83 text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით"
65 } 84 }
  85 +
66 "kaz" -> { 86 "kaz" -> {
67 text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш" 87 text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш"
68 } 88 }
  89 +
69 "mlt" -> { 90 "mlt" -> {
70 text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss" 91 text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss"
71 } 92 }
  93 +
72 "lav" -> { 94 "lav" -> {
73 text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi" 95 text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi"
74 } 96 }
  97 +
75 "lit" -> { 98 "lit" -> {
76 text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi" 99 text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi"
77 } 100 }
  101 +
78 "ltz" -> { 102 "ltz" -> {
79 text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi" 103 text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi"
80 } 104 }
  105 +
81 "nep" -> { 106 "nep" -> {
82 text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो" 107 text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो"
83 } 108 }
  109 +
84 "nld" -> { 110 "nld" -> {
85 - text = "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie" 111 + text =
  112 + "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie"
86 } 113 }
  114 +
87 "nor" -> { 115 "nor" -> {
88 text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi" 116 text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi"
89 } 117 }
  118 +
90 "pol" -> { 119 "pol" -> {
91 text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji" 120 text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji"
92 } 121 }
  122 +
93 "por" -> { 123 "por" -> {
94 - text = "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração" 124 + text =
  125 + "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração"
95 } 126 }
  127 +
96 "ron" -> { 128 "ron" -> {
97 text = "Acesta este un motor text to speech care folosește generația următoare de kadi" 129 text = "Acesta este un motor text to speech care folosește generația următoare de kadi"
98 } 130 }
  131 +
99 "rus" -> { 132 "rus" -> {
100 - text = "Это движок преобразования текста в речь, использующий Kaldi следующего поколения." 133 + text =
  134 + "Это движок преобразования текста в речь, использующий Kaldi следующего поколения."
101 } 135 }
  136 +
102 "slk" -> { 137 "slk" -> {
103 text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie" 138 text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie"
104 } 139 }
  140 +
105 "slv" -> { 141 "slv" -> {
106 - text = "To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije" 142 + text =
  143 + "To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije"
107 } 144 }
  145 +
108 "spa" -> { 146 "spa" -> {
109 text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación." 147 text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación."
110 } 148 }
  149 +
111 "srp" -> { 150 "srp" -> {
112 - text = "Ово је механизам за претварање текста у говор који користи калди следеће генерације" 151 + text =
  152 + "Ово је механизам за претварање текста у говор који користи калди следеће генерације"
113 } 153 }
  154 +
114 "swa" -> { 155 "swa" -> {
115 text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi" 156 text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi"
116 } 157 }
  158 +
117 "swe" -> { 159 "swe" -> {
118 text = "Detta är en text till tal-motor som använder nästa generations kaldi" 160 text = "Detta är en text till tal-motor som använder nästa generations kaldi"
119 } 161 }
  162 +
120 "tur" -> { 163 "tur" -> {
121 text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur" 164 text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur"
122 } 165 }
  166 +
123 "ukr" -> { 167 "ukr" -> {
124 - text = "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління" 168 + text =
  169 + "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління"
125 } 170 }
  171 +
126 "vie" -> { 172 "vie" -> {
127 text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo" 173 text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo"
128 } 174 }
  175 +
129 "zho", "cmn" -> { 176 "zho", "cmn" -> {
130 text = "使用新一代卡尔迪的语音合成引擎" 177 text = "使用新一代卡尔迪的语音合成引擎"
131 } 178 }
@@ -137,13 +184,13 @@ class GetSampleText : Activity() { @@ -137,13 +184,13 @@ class GetSampleText : Activity() {
137 override fun onCreate(savedInstanceState: Bundle?) { 184 override fun onCreate(savedInstanceState: Bundle?) {
138 super.onCreate(savedInstanceState) 185 super.onCreate(savedInstanceState)
139 var result = TextToSpeech.LANG_AVAILABLE 186 var result = TextToSpeech.LANG_AVAILABLE
140 - var text: String = getSampleText(TtsEngine.lang ?: "") 187 + val text: String = getSampleText(TtsEngine.lang ?: "")
141 if (text.isEmpty()) { 188 if (text.isEmpty()) {
142 result = TextToSpeech.LANG_NOT_SUPPORTED 189 result = TextToSpeech.LANG_NOT_SUPPORTED
143 } 190 }
144 191
145 - val intent = Intent().apply{  
146 - if(result == TextToSpeech.LANG_AVAILABLE) { 192 + val intent = Intent().apply {
  193 + if (result == TextToSpeech.LANG_AVAILABLE) {
147 putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text) 194 putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text)
148 } else { 195 } else {
149 putExtra("sampleText", text) 196 putExtra("sampleText", text)
@@ -26,20 +26,16 @@ import androidx.compose.material3.Scaffold @@ -26,20 +26,16 @@ import androidx.compose.material3.Scaffold
26 import androidx.compose.material3.Slider 26 import androidx.compose.material3.Slider
27 import androidx.compose.material3.Surface 27 import androidx.compose.material3.Surface
28 import androidx.compose.material3.Text 28 import androidx.compose.material3.Text
29 -import androidx.compose.material3.TextField  
30 import androidx.compose.material3.TopAppBar 29 import androidx.compose.material3.TopAppBar
31 -import androidx.compose.runtime.Composable  
32 import androidx.compose.runtime.getValue 30 import androidx.compose.runtime.getValue
33 import androidx.compose.runtime.mutableStateOf 31 import androidx.compose.runtime.mutableStateOf
34 import androidx.compose.runtime.remember 32 import androidx.compose.runtime.remember
35 import androidx.compose.runtime.setValue 33 import androidx.compose.runtime.setValue
36 import androidx.compose.ui.Modifier 34 import androidx.compose.ui.Modifier
37 import androidx.compose.ui.text.input.KeyboardType 35 import androidx.compose.ui.text.input.KeyboardType
38 -import androidx.compose.ui.tooling.preview.Preview  
39 import androidx.compose.ui.unit.dp 36 import androidx.compose.ui.unit.dp
40 import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme 37 import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme
41 import java.io.File 38 import java.io.File
42 -import java.lang.NumberFormatException  
43 39
44 const val TAG = "sherpa-onnx-tts-engine" 40 const val TAG = "sherpa-onnx-tts-engine"
45 41
@@ -76,7 +72,7 @@ class MainActivity : ComponentActivity() { @@ -76,7 +72,7 @@ class MainActivity : ComponentActivity() {
76 val testTextContent = getSampleText(TtsEngine.lang ?: "") 72 val testTextContent = getSampleText(TtsEngine.lang ?: "")
77 73
78 var testText by remember { mutableStateOf(testTextContent) } 74 var testText by remember { mutableStateOf(testTextContent) }
79 - 75 +
80 val numSpeakers = TtsEngine.tts!!.numSpeakers() 76 val numSpeakers = TtsEngine.tts!!.numSpeakers()
81 if (numSpeakers > 1) { 77 if (numSpeakers > 1) {
82 OutlinedTextField( 78 OutlinedTextField(
@@ -88,7 +84,7 @@ class MainActivity : ComponentActivity() { @@ -88,7 +84,7 @@ class MainActivity : ComponentActivity() {
88 try { 84 try {
89 TtsEngine.speakerId = it.toString().toInt() 85 TtsEngine.speakerId = it.toString().toInt()
90 } catch (ex: NumberFormatException) { 86 } catch (ex: NumberFormatException) {
91 - Log.i(TAG, "Invalid input: ${it}") 87 + Log.i(TAG, "Invalid input: $it")
92 TtsEngine.speakerId = 0 88 TtsEngine.speakerId = 0
93 } 89 }
94 } 90 }
@@ -119,7 +115,7 @@ class MainActivity : ComponentActivity() { @@ -119,7 +115,7 @@ class MainActivity : ComponentActivity() {
119 Button( 115 Button(
120 modifier = Modifier.padding(20.dp), 116 modifier = Modifier.padding(20.dp),
121 onClick = { 117 onClick = {
122 - Log.i(TAG, "Clicked, text: ${testText}") 118 + Log.i(TAG, "Clicked, text: $testText")
123 if (testText.isBlank() || testText.isEmpty()) { 119 if (testText.isBlank() || testText.isEmpty()) {
124 Toast.makeText( 120 Toast.makeText(
125 applicationContext, 121 applicationContext,
@@ -136,7 +132,7 @@ class MainActivity : ComponentActivity() { @@ -136,7 +132,7 @@ class MainActivity : ComponentActivity() {
136 val filename = 132 val filename =
137 application.filesDir.absolutePath + "/generated.wav" 133 application.filesDir.absolutePath + "/generated.wav"
138 val ok = 134 val ok =
139 - audio.samples.size > 0 && audio.save(filename) 135 + audio.samples.isNotEmpty() && audio.save(filename)
140 136
141 if (ok) { 137 if (ok) {
142 stopMediaPlayer() 138 stopMediaPlayer()
@@ -4,8 +4,10 @@ import android.content.Context @@ -4,8 +4,10 @@ import android.content.Context
4 import android.content.res.AssetManager 4 import android.content.res.AssetManager
5 import android.util.Log 5 import android.util.Log
6 import androidx.compose.runtime.MutableState 6 import androidx.compose.runtime.MutableState
7 -import androidx.compose.runtime.mutableStateOf  
8 -import com.k2fsa.sherpa.onnx.* 7 +import androidx.compose.runtime.mutableFloatStateOf
  8 +import androidx.compose.runtime.mutableIntStateOf
  9 +import com.k2fsa.sherpa.onnx.OfflineTts
  10 +import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
9 import java.io.File 11 import java.io.File
10 import java.io.FileOutputStream 12 import java.io.FileOutputStream
11 import java.io.IOException 13 import java.io.IOException
@@ -21,8 +23,8 @@ object TtsEngine { @@ -21,8 +23,8 @@ object TtsEngine {
21 var lang: String? = null 23 var lang: String? = null
22 24
23 25
24 - val speedState: MutableState<Float> = mutableStateOf(1.0F)  
25 - val speakerIdState: MutableState<Int> = mutableStateOf(0) 26 + val speedState: MutableState<Float> = mutableFloatStateOf(1.0F)
  27 + val speakerIdState: MutableState<Int> = mutableIntStateOf(0)
26 28
27 var speed: Float 29 var speed: Float
28 get() = speedState.value 30 get() = speedState.value
@@ -113,15 +115,15 @@ object TtsEngine { @@ -113,15 +115,15 @@ object TtsEngine {
113 115
114 if (dataDir != null) { 116 if (dataDir != null) {
115 val newDir = copyDataDir(context, modelDir!!) 117 val newDir = copyDataDir(context, modelDir!!)
116 - modelDir = newDir + "/" + modelDir  
117 - dataDir = newDir + "/" + dataDir 118 + modelDir = "$newDir/$modelDir"
  119 + dataDir = "$newDir/$dataDir"
118 assets = null 120 assets = null
119 } 121 }
120 122
121 if (dictDir != null) { 123 if (dictDir != null) {
122 val newDir = copyDataDir(context, modelDir!!) 124 val newDir = copyDataDir(context, modelDir!!)
123 - modelDir = newDir + "/" + modelDir  
124 - dictDir = modelDir + "/" + "dict" 125 + modelDir = "$newDir/$modelDir"
  126 + dictDir = "$modelDir/dict"
125 ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" 127 ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
126 assets = null 128 assets = null
127 } 129 }
@@ -132,18 +134,18 @@ object TtsEngine { @@ -132,18 +134,18 @@ object TtsEngine {
132 dictDir = dictDir ?: "", 134 dictDir = dictDir ?: "",
133 ruleFsts = ruleFsts ?: "", 135 ruleFsts = ruleFsts ?: "",
134 ruleFars = ruleFars ?: "" 136 ruleFars = ruleFars ?: ""
135 - )!! 137 + )
136 138
137 tts = OfflineTts(assetManager = assets, config = config) 139 tts = OfflineTts(assetManager = assets, config = config)
138 } 140 }
139 141
140 142
141 private fun copyDataDir(context: Context, dataDir: String): String { 143 private fun copyDataDir(context: Context, dataDir: String): String {
142 - println("data dir is $dataDir") 144 + Log.i(TAG, "data dir is $dataDir")
143 copyAssets(context, dataDir) 145 copyAssets(context, dataDir)
144 146
145 val newDataDir = context.getExternalFilesDir(null)!!.absolutePath 147 val newDataDir = context.getExternalFilesDir(null)!!.absolutePath
146 - println("newDataDir: $newDataDir") 148 + Log.i(TAG, "newDataDir: $newDataDir")
147 return newDataDir 149 return newDataDir
148 } 150 }
149 151
@@ -158,12 +160,12 @@ object TtsEngine { @@ -158,12 +160,12 @@ object TtsEngine {
158 val dir = File(fullPath) 160 val dir = File(fullPath)
159 dir.mkdirs() 161 dir.mkdirs()
160 for (asset in assets.iterator()) { 162 for (asset in assets.iterator()) {
161 - val p: String = if (path == "") "" else path + "/" 163 + val p: String = if (path == "") "" else "$path/"
162 copyAssets(context, p + asset) 164 copyAssets(context, p + asset)
163 } 165 }
164 } 166 }
165 } catch (ex: IOException) { 167 } catch (ex: IOException) {
166 - Log.e(TAG, "Failed to copy $path. ${ex.toString()}") 168 + Log.e(TAG, "Failed to copy $path. $ex")
167 } 169 }
168 } 170 }
169 171
@@ -183,7 +185,7 @@ object TtsEngine { @@ -183,7 +185,7 @@ object TtsEngine {
183 ostream.flush() 185 ostream.flush()
184 ostream.close() 186 ostream.close()
185 } catch (ex: Exception) { 187 } catch (ex: Exception) {
186 - Log.e(TAG, "Failed to copy $filename, ${ex.toString()}") 188 + Log.e(TAG, "Failed to copy $filename, $ex")
187 } 189 }
188 } 190 }
189 } 191 }
@@ -6,7 +6,6 @@ import android.speech.tts.SynthesisRequest @@ -6,7 +6,6 @@ import android.speech.tts.SynthesisRequest
6 import android.speech.tts.TextToSpeech 6 import android.speech.tts.TextToSpeech
7 import android.speech.tts.TextToSpeechService 7 import android.speech.tts.TextToSpeechService
8 import android.util.Log 8 import android.util.Log
9 -import com.k2fsa.sherpa.onnx.*  
10 9
11 /* 10 /*
12 https://developer.android.com/reference/java/util/Locale#getISO3Language() 11 https://developer.android.com/reference/java/util/Locale#getISO3Language()
1 package com.k2fsa.sherpa.onnx.tts.engine 1 package com.k2fsa.sherpa.onnx.tts.engine
2 2
3 import android.app.Application 3 import android.app.Application
4 -import android.os.FileUtils.ProgressListener  
5 import android.speech.tts.TextToSpeech 4 import android.speech.tts.TextToSpeech
6 import android.speech.tts.TextToSpeech.OnInitListener 5 import android.speech.tts.TextToSpeech.OnInitListener
7 import android.speech.tts.UtteranceProgressListener 6 import android.speech.tts.UtteranceProgressListener
@@ -27,7 +26,7 @@ class TtsViewModel : ViewModel() { @@ -27,7 +26,7 @@ class TtsViewModel : ViewModel() {
27 private val onInitListener = object : OnInitListener { 26 private val onInitListener = object : OnInitListener {
28 override fun onInit(status: Int) { 27 override fun onInit(status: Int) {
29 when (status) { 28 when (status) {
30 - TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeded") 29 + TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeeded")
31 TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed") 30 TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed")
32 else -> Log.i(TAG, "Unknown status $status") 31 else -> Log.i(TAG, "Unknown status $status")
33 } 32 }
@@ -15,7 +15,7 @@ @@ -15,7 +15,7 @@
15 android:theme="@style/Theme.SherpaOnnxVad" 15 android:theme="@style/Theme.SherpaOnnxVad"
16 tools:targetApi="31"> 16 tools:targetApi="31">
17 <activity 17 <activity
18 - android:name=".MainActivity" 18 + android:name="com.k2fsa.sherpa.onnx.vad.MainActivity"
19 android:exported="true"> 19 android:exported="true">
20 <intent-filter> 20 <intent-filter>
21 <action android:name="android.intent.action.MAIN" /> 21 <action android:name="android.intent.action.MAIN" />
1 -package com.k2fsa.sherpa.onnx 1 +package com.k2fsa.sherpa.onnx.vad
2 2
3 import android.Manifest 3 import android.Manifest
4 import android.content.pm.PackageManager 4 import android.content.pm.PackageManager
@@ -11,6 +11,9 @@ import android.view.View @@ -11,6 +11,9 @@ import android.view.View
11 import android.widget.Button 11 import android.widget.Button
12 import androidx.appcompat.app.AppCompatActivity 12 import androidx.appcompat.app.AppCompatActivity
13 import androidx.core.app.ActivityCompat 13 import androidx.core.app.ActivityCompat
  14 +import com.k2fsa.sherpa.onnx.R
  15 +import com.k2fsa.sherpa.onnx.Vad
  16 +import com.k2fsa.sherpa.onnx.getVadModelConfig
14 import kotlin.concurrent.thread 17 import kotlin.concurrent.thread
15 18
16 19
@@ -116,7 +119,7 @@ class MainActivity : AppCompatActivity() { @@ -116,7 +119,7 @@ class MainActivity : AppCompatActivity() {
116 119
117 private fun initVadModel() { 120 private fun initVadModel() {
118 val type = 0 121 val type = 0
119 - println("Select VAD model type ${type}") 122 + Log.i(TAG, "Select VAD model type ${type}")
120 val config = getVadModelConfig(type) 123 val config = getVadModelConfig(type)
121 124
122 vad = Vad( 125 vad = Vad(
@@ -171,4 +174,4 @@ class MainActivity : AppCompatActivity() { @@ -171,4 +174,4 @@ class MainActivity : AppCompatActivity() {
171 } 174 }
172 } 175 }
173 } 176 }
174 -}  
  177 +}
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 xmlns:tools="http://schemas.android.com/tools" 4 xmlns:tools="http://schemas.android.com/tools"
5 android:layout_width="match_parent" 5 android:layout_width="match_parent"
6 android:layout_height="match_parent" 6 android:layout_height="match_parent"
7 - tools:context=".MainActivity"> 7 + tools:context="com.k2fsa.sherpa.onnx.vad.MainActivity">
8 <LinearLayout 8 <LinearLayout
9 android:layout_width="match_parent" 9 android:layout_width="match_parent"
10 android:layout_height="match_parent" 10 android:layout_height="match_parent"
@@ -40,4 +40,4 @@ @@ -40,4 +40,4 @@
40 40
41 41
42 42
43 -</androidx.constraintlayout.widget.ConstraintLayout>  
  43 +</androidx.constraintlayout.widget.ConstraintLayout>
@@ -15,7 +15,7 @@ @@ -15,7 +15,7 @@
15 android:theme="@style/Theme.SherpaOnnxVadAsr" 15 android:theme="@style/Theme.SherpaOnnxVadAsr"
16 tools:targetApi="31"> 16 tools:targetApi="31">
17 <activity 17 <activity
18 - android:name=".MainActivity" 18 + android:name=".vad.asr.MainActivity"
19 android:exported="true"> 19 android:exported="true">
20 <intent-filter> 20 <intent-filter>
21 <action android:name="android.intent.action.MAIN" /> 21 <action android:name="android.intent.action.MAIN" />
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt
1 -package com.k2fsa.sherpa.onnx 1 +package com.k2fsa.sherpa.onnx.vad.asr
2 2
3 import android.Manifest 3 import android.Manifest
4 import android.content.pm.PackageManager 4 import android.content.pm.PackageManager
@@ -13,6 +13,13 @@ import android.widget.Button @@ -13,6 +13,13 @@ import android.widget.Button
13 import android.widget.TextView 13 import android.widget.TextView
14 import androidx.appcompat.app.AppCompatActivity 14 import androidx.appcompat.app.AppCompatActivity
15 import androidx.core.app.ActivityCompat 15 import androidx.core.app.ActivityCompat
  16 +import com.k2fsa.sherpa.onnx.OfflineRecognizer
  17 +import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig
  18 +import com.k2fsa.sherpa.onnx.R
  19 +import com.k2fsa.sherpa.onnx.Vad
  20 +import com.k2fsa.sherpa.onnx.getFeatureConfig
  21 +import com.k2fsa.sherpa.onnx.getOfflineModelConfig
  22 +import com.k2fsa.sherpa.onnx.getVadModelConfig
16 import kotlin.concurrent.thread 23 import kotlin.concurrent.thread
17 24
18 25
@@ -40,7 +47,7 @@ class MainActivity : AppCompatActivity() { @@ -40,7 +47,7 @@ class MainActivity : AppCompatActivity() {
40 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) 47 private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO)
41 48
42 // Non-streaming ASR 49 // Non-streaming ASR
43 - private lateinit var offlineRecognizer: SherpaOnnxOffline 50 + private lateinit var offlineRecognizer: OfflineRecognizer
44 51
45 private var idx: Int = 0 52 private var idx: Int = 0
46 private var lastText: String = "" 53 private var lastText: String = ""
@@ -122,7 +129,7 @@ class MainActivity : AppCompatActivity() { @@ -122,7 +129,7 @@ class MainActivity : AppCompatActivity() {
122 129
123 private fun initVadModel() { 130 private fun initVadModel() {
124 val type = 0 131 val type = 0
125 - println("Select VAD model type ${type}") 132 + Log.i(TAG, "Select VAD model type ${type}")
126 val config = getVadModelConfig(type) 133 val config = getVadModelConfig(type)
127 134
128 vad = Vad( 135 vad = Vad(
@@ -194,20 +201,25 @@ class MainActivity : AppCompatActivity() { @@ -194,20 +201,25 @@ class MainActivity : AppCompatActivity() {
194 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html 201 // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
195 // for a list of available models 202 // for a list of available models
196 val secondType = 0 203 val secondType = 0
197 - println("Select model type ${secondType} for the second pass") 204 + Log.i(TAG, "Select model type ${secondType} for the second pass")
198 205
199 val config = OfflineRecognizerConfig( 206 val config = OfflineRecognizerConfig(
200 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), 207 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
201 modelConfig = getOfflineModelConfig(type = secondType)!!, 208 modelConfig = getOfflineModelConfig(type = secondType)!!,
202 ) 209 )
203 210
204 - offlineRecognizer = SherpaOnnxOffline( 211 + offlineRecognizer = OfflineRecognizer(
205 assetManager = application.assets, 212 assetManager = application.assets,
206 config = config, 213 config = config,
207 ) 214 )
208 } 215 }
209 216
210 private fun runSecondPass(samples: FloatArray): String { 217 private fun runSecondPass(samples: FloatArray): String {
211 - return offlineRecognizer.decode(samples, sampleRateInHz) 218 + val stream = offlineRecognizer.createStream()
  219 + stream.acceptWaveform(samples, sampleRateInHz)
  220 + offlineRecognizer.decode(stream)
  221 + val result = offlineRecognizer.getResult(stream)
  222 + stream.release()
  223 + return result.text
212 } 224 }
213 -}  
  225 +}
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt
1 -../../../../../../../../../SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt  
1 -../../../../../../../../../SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt  
  1 +../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt
@@ -4,7 +4,7 @@ @@ -4,7 +4,7 @@
4 xmlns:tools="http://schemas.android.com/tools" 4 xmlns:tools="http://schemas.android.com/tools"
5 android:layout_width="match_parent" 5 android:layout_width="match_parent"
6 android:layout_height="match_parent" 6 android:layout_height="match_parent"
7 - tools:context=".MainActivity"> 7 + tools:context=".vad.asr.MainActivity">
8 8
9 <LinearLayout 9 <LinearLayout
10 android:layout_width="match_parent" 10 android:layout_width="match_parent"
1 <resources> 1 <resources>
2 - <string name="app_name">VAD-ASR</string> 2 + <string name="app_name">VAD+ASR</string>
3 <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi. 3 <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi.
4 \n 4 \n
5 \n\n\n 5 \n\n\n
@@ -59,7 +59,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ @@ -59,7 +59,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
59 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" 59 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
60 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" 60 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
61 61
  62 +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
  63 + SHERPA_ONNX_ENABLE_TTS=ON
  64 +fi
  65 +
  66 +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
  67 + SHERPA_ONNX_ENABLE_BINARY=OFF
  68 +fi
  69 +
62 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ 70 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
  71 + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
  72 + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
63 -DBUILD_PIPER_PHONMIZE_EXE=OFF \ 73 -DBUILD_PIPER_PHONMIZE_EXE=OFF \
64 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ 74 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
65 -DBUILD_ESPEAK_NG_EXE=OFF \ 75 -DBUILD_ESPEAK_NG_EXE=OFF \
@@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
60 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" 60 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
61 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" 61 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
62 62
  63 +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
  64 + SHERPA_ONNX_ENABLE_TTS=ON
  65 +fi
  66 +
  67 +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
  68 + SHERPA_ONNX_ENABLE_BINARY=OFF
  69 +fi
  70 +
63 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ 71 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
  72 + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
  73 + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
64 -DBUILD_PIPER_PHONMIZE_EXE=OFF \ 74 -DBUILD_PIPER_PHONMIZE_EXE=OFF \
65 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ 75 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
66 -DBUILD_ESPEAK_NG_EXE=OFF \ 76 -DBUILD_ESPEAK_NG_EXE=OFF \
@@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
60 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" 60 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
61 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" 61 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
62 62
  63 +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
  64 + SHERPA_ONNX_ENABLE_TTS=ON
  65 +fi
  66 +
  67 +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
  68 + SHERPA_ONNX_ENABLE_BINARY=OFF
  69 +fi
  70 +
63 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ 71 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
  72 + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
  73 + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
64 -DBUILD_PIPER_PHONMIZE_EXE=OFF \ 74 -DBUILD_PIPER_PHONMIZE_EXE=OFF \
65 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ 75 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
66 -DBUILD_ESPEAK_NG_EXE=OFF \ 76 -DBUILD_ESPEAK_NG_EXE=OFF \
@@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/
60 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" 60 echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
61 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" 61 echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
62 62
  63 +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then
  64 + SHERPA_ONNX_ENABLE_TTS=ON
  65 +fi
  66 +
  67 +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then
  68 + SHERPA_ONNX_ENABLE_BINARY=OFF
  69 +fi
  70 +
63 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ 71 cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
  72 + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \
  73 + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \
64 -DBUILD_PIPER_PHONMIZE_EXE=OFF \ 74 -DBUILD_PIPER_PHONMIZE_EXE=OFF \
65 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ 75 -DBUILD_PIPER_PHONMIZE_TESTS=OFF \
66 -DBUILD_ESPEAK_NG_EXE=OFF \ 76 -DBUILD_ESPEAK_NG_EXE=OFF \
1 -../android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt  
  1 +../sherpa-onnx/kotlin-api/AudioTagging.kt
  1 +../sherpa-onnx/kotlin-api/FeatureConfig.kt
1 -package com.k2fsa.sherpa.onnx  
2 -  
3 -import android.content.res.AssetManager  
4 -  
5 -fun callback(samples: FloatArray): Unit {  
6 - println("callback got called with ${samples.size} samples");  
7 -}  
8 -  
9 -fun main() {  
10 - testSpokenLanguageIdentifcation()  
11 - testAudioTagging()  
12 - testSpeakerRecognition()  
13 - testTts()  
14 - testAsr("transducer")  
15 - testAsr("zipformer2-ctc")  
16 -}  
17 -  
18 -fun testSpokenLanguageIdentifcation() {  
19 - val config = SpokenLanguageIdentificationConfig(  
20 - whisper = SpokenLanguageIdentificationWhisperConfig(  
21 - encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",  
22 - decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",  
23 - tailPaddings = 33,  
24 - ),  
25 - numThreads=1,  
26 - debug=true,  
27 - provider="cpu",  
28 - )  
29 - val slid = SpokenLanguageIdentification(assetManager=null, config=config)  
30 -  
31 - val testFiles = arrayOf(  
32 - "./spoken-language-identification-test-wavs/ar-arabic.wav",  
33 - "./spoken-language-identification-test-wavs/bg-bulgarian.wav",  
34 - "./spoken-language-identification-test-wavs/de-german.wav",  
35 - )  
36 -  
37 - for (waveFilename in testFiles) {  
38 - val objArray = WaveReader.readWaveFromFile(  
39 - filename = waveFilename,  
40 - )  
41 - val samples: FloatArray = objArray[0] as FloatArray  
42 - val sampleRate: Int = objArray[1] as Int  
43 -  
44 - val stream = slid.createStream()  
45 - stream.acceptWaveform(samples, sampleRate = sampleRate)  
46 - val lang = slid.compute(stream)  
47 - stream.release()  
48 - println(waveFilename)  
49 - println(lang)  
50 - }  
51 -}  
52 -  
53 -fun testAudioTagging() {  
54 - val config = AudioTaggingConfig(  
55 - model=AudioTaggingModelConfig(  
56 - zipformer=OfflineZipformerAudioTaggingModelConfig(  
57 - model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx",  
58 - ),  
59 - numThreads=1,  
60 - debug=true,  
61 - provider="cpu",  
62 - ),  
63 - labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv",  
64 - topK=5,  
65 - )  
66 - val tagger = AudioTagging(assetManager=null, config=config)  
67 -  
68 - val testFiles = arrayOf(  
69 - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav",  
70 - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav",  
71 - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav",  
72 - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav",  
73 - )  
74 - println("----------")  
75 - for (waveFilename in testFiles) {  
76 - val stream = tagger.createStream()  
77 -  
78 - val objArray = WaveReader.readWaveFromFile(  
79 - filename = waveFilename,  
80 - )  
81 - val samples: FloatArray = objArray[0] as FloatArray  
82 - val sampleRate: Int = objArray[1] as Int  
83 -  
84 - stream.acceptWaveform(samples, sampleRate = sampleRate)  
85 - val events = tagger.compute(stream)  
86 - stream.release()  
87 -  
88 - println(waveFilename)  
89 - println(events)  
90 - println("----------")  
91 - }  
92 -  
93 - tagger.release()  
94 -}  
95 -  
96 -fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {  
97 - var objArray = WaveReader.readWaveFromFile(  
98 - filename = filename,  
99 - )  
100 - var samples: FloatArray = objArray[0] as FloatArray  
101 - var sampleRate: Int = objArray[1] as Int  
102 -  
103 - val stream = extractor.createStream()  
104 - stream.acceptWaveform(sampleRate = sampleRate, samples=samples)  
105 - stream.inputFinished()  
106 - check(extractor.isReady(stream))  
107 -  
108 - val embedding = extractor.compute(stream)  
109 -  
110 - stream.release()  
111 -  
112 - return embedding  
113 -}  
114 -  
115 -fun testSpeakerRecognition() {  
116 - val config = SpeakerEmbeddingExtractorConfig(  
117 - model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",  
118 - )  
119 - val extractor = SpeakerEmbeddingExtractor(config = config)  
120 -  
121 - val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")  
122 - val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")  
123 - val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")  
124 -  
125 - var manager = SpeakerEmbeddingManager(extractor.dim())  
126 - var ok = manager.add(name = "speaker1", embedding=embedding1a)  
127 - check(ok)  
128 -  
129 - manager.add(name = "speaker2", embedding=embedding2a)  
130 - check(ok)  
131 -  
132 - var name = manager.search(embedding=embedding1b, threshold=0.5f)  
133 - check(name == "speaker1")  
134 -  
135 - manager.release()  
136 -  
137 - manager = SpeakerEmbeddingManager(extractor.dim())  
138 - val embeddingList = mutableListOf(embedding1a, embedding1b)  
139 - ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())  
140 - check(ok)  
141 -  
142 - name = manager.search(embedding=embedding1b, threshold=0.5f)  
143 - check(name == "s1")  
144 -  
145 - name = manager.search(embedding=embedding2a, threshold=0.5f)  
146 - check(name.length == 0)  
147 -  
148 - manager.release()  
149 -}  
150 -  
151 -fun testTts() {  
152 - // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models  
153 - // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2  
154 - var config = OfflineTtsConfig(  
155 - model=OfflineTtsModelConfig(  
156 - vits=OfflineTtsVitsModelConfig(  
157 - model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx",  
158 - tokens="./vits-piper-en_US-amy-low/tokens.txt",  
159 - dataDir="./vits-piper-en_US-amy-low/espeak-ng-data",  
160 - ),  
161 - numThreads=1,  
162 - debug=true,  
163 - )  
164 - )  
165 - val tts = OfflineTts(config=config)  
166 - val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback)  
167 - audio.save(filename="test-en.wav")  
168 -}  
169 -  
170 -fun testAsr(type: String) {  
171 - var featConfig = FeatureConfig(  
172 - sampleRate = 16000,  
173 - featureDim = 80,  
174 - )  
175 -  
176 - var waveFilename: String  
177 - var modelConfig: OnlineModelConfig = when (type) {  
178 - "transducer" -> {  
179 - waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav"  
180 - // please refer to  
181 - // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html  
182 - // to dowload pre-trained models  
183 - OnlineModelConfig(  
184 - transducer = OnlineTransducerModelConfig(  
185 - encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx",  
186 - decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx",  
187 - joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx",  
188 - ),  
189 - tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt",  
190 - numThreads = 1,  
191 - debug = false,  
192 - )  
193 - }  
194 - "zipformer2-ctc" -> {  
195 - waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav"  
196 - OnlineModelConfig(  
197 - zipformer2Ctc = OnlineZipformer2CtcModelConfig(  
198 - model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx",  
199 - ),  
200 - tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt",  
201 - numThreads = 1,  
202 - debug = false,  
203 - )  
204 - }  
205 - else -> throw IllegalArgumentException(type)  
206 - }  
207 -  
208 - var endpointConfig = EndpointConfig()  
209 -  
210 - var lmConfig = OnlineLMConfig()  
211 -  
212 - var config = OnlineRecognizerConfig(  
213 - modelConfig = modelConfig,  
214 - lmConfig = lmConfig,  
215 - featConfig = featConfig,  
216 - endpointConfig = endpointConfig,  
217 - enableEndpoint = true,  
218 - decodingMethod = "greedy_search",  
219 - maxActivePaths = 4,  
220 - )  
221 -  
222 - var model = SherpaOnnx(  
223 - config = config,  
224 - )  
225 -  
226 - var objArray = WaveReader.readWaveFromFile(  
227 - filename = waveFilename,  
228 - )  
229 - var samples: FloatArray = objArray[0] as FloatArray  
230 - var sampleRate: Int = objArray[1] as Int  
231 -  
232 - model.acceptWaveform(samples, sampleRate = sampleRate)  
233 - while (model.isReady()) {  
234 - model.decode()  
235 - }  
236 -  
237 - var tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds  
238 - model.acceptWaveform(tailPaddings, sampleRate = sampleRate)  
239 - model.inputFinished()  
240 - while (model.isReady()) {  
241 - model.decode()  
242 - }  
243 -  
244 - println("results: ${model.text}")  
245 -}  
  1 +../sherpa-onnx/kotlin-api/OfflineRecognizer.kt
1 -../android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/OfflineStream.kt  
  1 +../sherpa-onnx/kotlin-api/OfflineStream.kt
  1 +../sherpa-onnx/kotlin-api/OnlineRecognizer.kt
  1 +../sherpa-onnx/kotlin-api/OnlineStream.kt
1 -../android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt  
1 -../android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt  
1 -../android/SherpaOnnxSpeakerIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/speaker/identification/Speaker.kt  
  1 +../sherpa-onnx/kotlin-api/Speaker.kt
1 -../android/SherpaOnnxSpokenLanguageIdentification/app/src/main/java/com/k2fsa/sherpa/onnx/slid/SpokenLanguageIdentification.kt  
  1 +../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt
1 -../android/SherpaOnnxVad/app/src/main/java/com/k2fsa/sherpa/onnx/Vad.kt  
  1 +../sherpa-onnx/kotlin-api/Vad.kt
1 -../android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/WaveReader.kt  
  1 +../sherpa-onnx/kotlin-api/WaveReader.kt
@@ -44,9 +44,23 @@ function testSpeakerEmbeddingExtractor() { @@ -44,9 +44,23 @@ function testSpeakerEmbeddingExtractor() {
44 if [ ! -f ./speaker2_a_cn_16k.wav ]; then 44 if [ ! -f ./speaker2_a_cn_16k.wav ]; then
45 curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav 45 curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav
46 fi 46 fi
  47 +
  48 + out_filename=test_speaker_id.jar
  49 + kotlinc-jvm -include-runtime -d $out_filename \
  50 + test_speaker_id.kt \
  51 + OnlineStream.kt \
  52 + Speaker.kt \
  53 + WaveReader.kt \
  54 + faked-asset-manager.kt \
  55 + faked-log.kt
  56 +
  57 + ls -lh $out_filename
  58 +
  59 + java -Djava.library.path=../build/lib -jar $out_filename
47 } 60 }
48 61
49 -function testAsr() { 62 +
  63 +function testOnlineAsr() {
50 if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then 64 if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then
51 git lfs install 65 git lfs install
52 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 66 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21
@@ -57,6 +71,20 @@ function testAsr() { @@ -57,6 +71,20 @@ function testAsr() {
57 tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 71 tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
58 rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 72 rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
59 fi 73 fi
  74 +
  75 + out_filename=test_online_asr.jar
  76 + kotlinc-jvm -include-runtime -d $out_filename \
  77 + test_online_asr.kt \
  78 + FeatureConfig.kt \
  79 + OnlineRecognizer.kt \
  80 + OnlineStream.kt \
  81 + WaveReader.kt \
  82 + faked-asset-manager.kt \
  83 + faked-log.kt
  84 +
  85 + ls -lh $out_filename
  86 +
  87 + java -Djava.library.path=../build/lib -jar $out_filename
60 } 88 }
61 89
62 function testTts() { 90 function testTts() {
@@ -65,16 +93,42 @@ function testTts() { @@ -65,16 +93,42 @@ function testTts() {
65 tar xf vits-piper-en_US-amy-low.tar.bz2 93 tar xf vits-piper-en_US-amy-low.tar.bz2
66 rm vits-piper-en_US-amy-low.tar.bz2 94 rm vits-piper-en_US-amy-low.tar.bz2
67 fi 95 fi
  96 +
  97 + out_filename=test_tts.jar
  98 + kotlinc-jvm -include-runtime -d $out_filename \
  99 + test_tts.kt \
  100 + Tts.kt \
  101 + faked-asset-manager.kt \
  102 + faked-log.kt
  103 +
  104 + ls -lh $out_filename
  105 +
  106 + java -Djava.library.path=../build/lib -jar $out_filename
68 } 107 }
69 108
  109 +
70 function testAudioTagging() { 110 function testAudioTagging() {
71 if [ ! -d sherpa-onnx-zipformer-audio-tagging-2024-04-09 ]; then 111 if [ ! -d sherpa-onnx-zipformer-audio-tagging-2024-04-09 ]; then
72 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 112 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
73 tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 113 tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
74 rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 114 rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2
75 fi 115 fi
  116 +
  117 + out_filename=test_audio_tagging.jar
  118 + kotlinc-jvm -include-runtime -d $out_filename \
  119 + test_audio_tagging.kt \
  120 + AudioTagging.kt \
  121 + OfflineStream.kt \
  122 + WaveReader.kt \
  123 + faked-asset-manager.kt \
  124 + faked-log.kt
  125 +
  126 + ls -lh $out_filename
  127 +
  128 + java -Djava.library.path=../build/lib -jar $out_filename
76 } 129 }
77 130
  131 +
78 function testSpokenLanguageIdentification() { 132 function testSpokenLanguageIdentification() {
79 if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then 133 if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then
80 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 134 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
@@ -87,50 +141,44 @@ function testSpokenLanguageIdentification() { @@ -87,50 +141,44 @@ function testSpokenLanguageIdentification() {
87 tar xvf spoken-language-identification-test-wavs.tar.bz2 141 tar xvf spoken-language-identification-test-wavs.tar.bz2
88 rm spoken-language-identification-test-wavs.tar.bz2 142 rm spoken-language-identification-test-wavs.tar.bz2
89 fi 143 fi
90 -}  
91 144
92 -function test() {  
93 - testSpokenLanguageIdentification  
94 - testAudioTagging  
95 - testSpeakerEmbeddingExtractor  
96 - testAsr  
97 - testTts  
98 -} 145 + out_filename=test_language_id.jar
  146 + kotlinc-jvm -include-runtime -d $out_filename \
  147 + test_language_id.kt \
  148 + SpokenLanguageIdentification.kt \
  149 + OfflineStream.kt \
  150 + WaveReader.kt \
  151 + faked-asset-manager.kt \
  152 + faked-log.kt
99 153
100 -test  
101 -  
102 -kotlinc-jvm -include-runtime -d main.jar \  
103 - AudioTagging.kt \  
104 - Main.kt \  
105 - OfflineStream.kt \  
106 - SherpaOnnx.kt \  
107 - Speaker.kt \  
108 - SpokenLanguageIdentification.kt \  
109 - Tts.kt \  
110 - WaveReader.kt \  
111 - faked-asset-manager.kt \  
112 - faked-log.kt  
113 -  
114 -ls -lh main.jar  
115 -  
116 -java -Djava.library.path=../build/lib -jar main.jar  
117 -  
118 -function testTwoPass() {  
119 - if [ ! -f ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.int8.onnx ]; then  
120 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2  
121 - tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2  
122 - rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2  
123 - fi 154 + ls -lh $out_filename
  155 +
  156 + java -Djava.library.path=../build/lib -jar $out_filename
  157 +}
124 158
  159 +function testOfflineAsr() {
125 if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then 160 if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then
126 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 161 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
127 tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 162 tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
128 rm sherpa-onnx-whisper-tiny.en.tar.bz2 163 rm sherpa-onnx-whisper-tiny.en.tar.bz2
129 fi 164 fi
130 165
131 - kotlinc-jvm -include-runtime -d 2pass.jar test-2pass.kt WaveReader.kt SherpaOnnx2Pass.kt faked-asset-manager.kt  
132 - ls -lh 2pass.jar  
133 - java -Djava.library.path=../build/lib -jar 2pass.jar 166 + out_filename=test_offline_asr.jar
  167 + kotlinc-jvm -include-runtime -d $out_filename \
  168 + test_offline_asr.kt \
  169 + FeatureConfig.kt \
  170 + OfflineRecognizer.kt \
  171 + OfflineStream.kt \
  172 + WaveReader.kt \
  173 + faked-asset-manager.kt
  174 +
  175 + ls -lh $out_filename
  176 + java -Djava.library.path=../build/lib -jar $out_filename
134 } 177 }
135 178
136 -testTwoPass 179 +testSpeakerEmbeddingExtractor
  180 +testOnlineAsr
  181 +testTts
  182 +testAudioTagging
  183 +testSpokenLanguageIdentification
  184 +testOfflineAsr
1 -package com.k2fsa.sherpa.onnx  
2 -  
3 -fun main() {  
4 - test2Pass()  
5 -}  
6 -  
7 -fun test2Pass() {  
8 - val firstPass = createFirstPass()  
9 - val secondPass = createSecondPass()  
10 -  
11 - val waveFilename = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav"  
12 -  
13 - var objArray = WaveReader.readWaveFromFile(  
14 - filename = waveFilename,  
15 - )  
16 - var samples: FloatArray = objArray[0] as FloatArray  
17 - var sampleRate: Int = objArray[1] as Int  
18 -  
19 - firstPass.acceptWaveform(samples, sampleRate = sampleRate)  
20 - while (firstPass.isReady()) {  
21 - firstPass.decode()  
22 - }  
23 -  
24 - var text = firstPass.text  
25 - println("First pass text: $text")  
26 -  
27 - text = secondPass.decode(samples, sampleRate)  
28 - println("Second pass text: $text")  
29 -}  
30 -  
31 -fun createFirstPass(): SherpaOnnx {  
32 - val config = OnlineRecognizerConfig(  
33 - featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),  
34 - modelConfig = getModelConfig(type = 1)!!,  
35 - endpointConfig = getEndpointConfig(),  
36 - enableEndpoint = true,  
37 - )  
38 -  
39 - return SherpaOnnx(config = config)  
40 -}  
41 -  
42 -fun createSecondPass(): SherpaOnnxOffline {  
43 - val config = OfflineRecognizerConfig(  
44 - featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),  
45 - modelConfig = getOfflineModelConfig(type = 2)!!,  
46 - )  
47 -  
48 - return SherpaOnnxOffline(config = config)  
49 -}  
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + testAudioTagging()
  5 +}
  6 +
  7 +fun testAudioTagging() {
  8 + val config = AudioTaggingConfig(
  9 + model=AudioTaggingModelConfig(
  10 + zipformer=OfflineZipformerAudioTaggingModelConfig(
  11 + model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx",
  12 + ),
  13 + numThreads=1,
  14 + debug=true,
  15 + provider="cpu",
  16 + ),
  17 + labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv",
  18 + topK=5,
  19 + )
  20 + val tagger = AudioTagging(config=config)
  21 +
  22 + val testFiles = arrayOf(
  23 + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav",
  24 + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav",
  25 + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav",
  26 + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav",
  27 + )
  28 + println("----------")
  29 + for (waveFilename in testFiles) {
  30 + val stream = tagger.createStream()
  31 +
  32 + val objArray = WaveReader.readWaveFromFile(
  33 + filename = waveFilename,
  34 + )
  35 + val samples: FloatArray = objArray[0] as FloatArray
  36 + val sampleRate: Int = objArray[1] as Int
  37 +
  38 + stream.acceptWaveform(samples, sampleRate = sampleRate)
  39 + val events = tagger.compute(stream)
  40 + stream.release()
  41 +
  42 + println(waveFilename)
  43 + println(events)
  44 + println("----------")
  45 + }
  46 +
  47 + tagger.release()
  48 +}
  49 +
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + testSpokenLanguageIdentifcation()
  5 +}
  6 +
  7 +fun testSpokenLanguageIdentifcation() {
  8 + val config = SpokenLanguageIdentificationConfig(
  9 + whisper = SpokenLanguageIdentificationWhisperConfig(
  10 + encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx",
  11 + decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx",
  12 + tailPaddings = 33,
  13 + ),
  14 + numThreads=1,
  15 + debug=true,
  16 + provider="cpu",
  17 + )
  18 + val slid = SpokenLanguageIdentification(config=config)
  19 +
  20 + val testFiles = arrayOf(
  21 + "./spoken-language-identification-test-wavs/ar-arabic.wav",
  22 + "./spoken-language-identification-test-wavs/bg-bulgarian.wav",
  23 + "./spoken-language-identification-test-wavs/de-german.wav",
  24 + )
  25 +
  26 + for (waveFilename in testFiles) {
  27 + val objArray = WaveReader.readWaveFromFile(
  28 + filename = waveFilename,
  29 + )
  30 + val samples: FloatArray = objArray[0] as FloatArray
  31 + val sampleRate: Int = objArray[1] as Int
  32 +
  33 + val stream = slid.createStream()
  34 + stream.acceptWaveform(samples, sampleRate = sampleRate)
  35 + val lang = slid.compute(stream)
  36 + stream.release()
  37 + println(waveFilename)
  38 + println(lang)
  39 + }
  40 +
  41 + slid.release()
  42 +}
  43 +
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + val recognizer = createOfflineRecognizer()
  5 +
  6 + val waveFilename = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav"
  7 +
  8 + val objArray = WaveReader.readWaveFromFile(
  9 + filename = waveFilename,
  10 + )
  11 + val samples: FloatArray = objArray[0] as FloatArray
  12 + val sampleRate: Int = objArray[1] as Int
  13 +
  14 + val stream = recognizer.createStream()
  15 + stream.acceptWaveform(samples, sampleRate=sampleRate)
  16 + recognizer.decode(stream)
  17 +
  18 + val result = recognizer.getResult(stream)
  19 + println(result)
  20 +
  21 + stream.release()
  22 + recognizer.release()
  23 +}
  24 +
  25 +fun createOfflineRecognizer(): OfflineRecognizer {
  26 + val config = OfflineRecognizerConfig(
  27 + featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
  28 + modelConfig = getOfflineModelConfig(type = 2)!!,
  29 + )
  30 +
  31 + return OfflineRecognizer(config = config)
  32 +}
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + testOnlineAsr("transducer")
  5 + testOnlineAsr("zipformer2-ctc")
  6 +}
  7 +
  8 +fun testOnlineAsr(type: String) {
  9 + val featConfig = FeatureConfig(
  10 + sampleRate = 16000,
  11 + featureDim = 80,
  12 + )
  13 +
  14 + val waveFilename: String
  15 + val modelConfig: OnlineModelConfig = when (type) {
  16 + "transducer" -> {
  17 + waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav"
  18 + // please refer to
  19 + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
  20 + // to dowload pre-trained models
  21 + OnlineModelConfig(
  22 + transducer = OnlineTransducerModelConfig(
  23 + encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx",
  24 + decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx",
  25 + joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx",
  26 + ),
  27 + tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt",
  28 + numThreads = 1,
  29 + debug = false,
  30 + )
  31 + }
  32 + "zipformer2-ctc" -> {
  33 + waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav"
  34 + OnlineModelConfig(
  35 + zipformer2Ctc = OnlineZipformer2CtcModelConfig(
  36 + model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx",
  37 + ),
  38 + tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt",
  39 + numThreads = 1,
  40 + debug = false,
  41 + )
  42 + }
  43 + else -> throw IllegalArgumentException(type)
  44 + }
  45 +
  46 + val endpointConfig = EndpointConfig()
  47 +
  48 + val lmConfig = OnlineLMConfig()
  49 +
  50 + val config = OnlineRecognizerConfig(
  51 + modelConfig = modelConfig,
  52 + lmConfig = lmConfig,
  53 + featConfig = featConfig,
  54 + endpointConfig = endpointConfig,
  55 + enableEndpoint = true,
  56 + decodingMethod = "greedy_search",
  57 + maxActivePaths = 4,
  58 + )
  59 +
  60 + val recognizer = OnlineRecognizer(
  61 + config = config,
  62 + )
  63 +
  64 + val objArray = WaveReader.readWaveFromFile(
  65 + filename = waveFilename,
  66 + )
  67 + val samples: FloatArray = objArray[0] as FloatArray
  68 + val sampleRate: Int = objArray[1] as Int
  69 +
  70 + val stream = recognizer.createStream()
  71 + stream.acceptWaveform(samples, sampleRate = sampleRate)
  72 + while (recognizer.isReady(stream)) {
  73 + recognizer.decode(stream)
  74 + }
  75 +
  76 + val tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds
  77 + stream.acceptWaveform(tailPaddings, sampleRate = sampleRate)
  78 + stream.inputFinished()
  79 + while (recognizer.isReady(stream)) {
  80 + recognizer.decode(stream)
  81 + }
  82 +
  83 + println("results: ${recognizer.getResult(stream).text}")
  84 +
  85 + stream.release()
  86 + recognizer.release()
  87 +}
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + testSpeakerRecognition()
  5 +}
  6 +
  7 +fun testSpeakerRecognition() {
  8 + val config = SpeakerEmbeddingExtractorConfig(
  9 + model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx",
  10 + )
  11 + val extractor = SpeakerEmbeddingExtractor(config = config)
  12 +
  13 + val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav")
  14 + val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav")
  15 + val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav")
  16 +
  17 + var manager = SpeakerEmbeddingManager(extractor.dim())
  18 + var ok = manager.add(name = "speaker1", embedding=embedding1a)
  19 + check(ok)
  20 +
  21 + manager.add(name = "speaker2", embedding=embedding2a)
  22 + check(ok)
  23 +
  24 + var name = manager.search(embedding=embedding1b, threshold=0.5f)
  25 + check(name == "speaker1")
  26 +
  27 + manager.release()
  28 +
  29 + manager = SpeakerEmbeddingManager(extractor.dim())
  30 + val embeddingList = mutableListOf(embedding1a, embedding1b)
  31 + ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray())
  32 + check(ok)
  33 +
  34 + name = manager.search(embedding=embedding1b, threshold=0.5f)
  35 + check(name == "s1")
  36 +
  37 + name = manager.search(embedding=embedding2a, threshold=0.5f)
  38 + check(name.length == 0)
  39 +
  40 + manager.release()
  41 + extractor.release()
  42 + println("Speaker ID test done!")
  43 +}
  44 +
  45 +fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray {
  46 + var objArray = WaveReader.readWaveFromFile(
  47 + filename = filename,
  48 + )
  49 + var samples: FloatArray = objArray[0] as FloatArray
  50 + var sampleRate: Int = objArray[1] as Int
  51 +
  52 + val stream = extractor.createStream()
  53 + stream.acceptWaveform(sampleRate = sampleRate, samples=samples)
  54 + stream.inputFinished()
  55 + check(extractor.isReady(stream))
  56 +
  57 + val embedding = extractor.compute(stream)
  58 +
  59 + stream.release()
  60 +
  61 + return embedding
  62 +}
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + testTts()
  5 +}
  6 +
  7 +fun testTts() {
  8 + // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  9 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
  10 + var config = OfflineTtsConfig(
  11 + model=OfflineTtsModelConfig(
  12 + vits=OfflineTtsVitsModelConfig(
  13 + model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx",
  14 + tokens="./vits-piper-en_US-amy-low/tokens.txt",
  15 + dataDir="./vits-piper-en_US-amy-low/espeak-ng-data",
  16 + ),
  17 + numThreads=1,
  18 + debug=true,
  19 + )
  20 + )
  21 + val tts = OfflineTts(config=config)
  22 + val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback)
  23 + audio.save(filename="test-en.wav")
  24 + tts.release()
  25 + println("Saved to test-en.wav")
  26 +}
  27 +
  28 +fun callback(samples: FloatArray): Unit {
  29 + println("callback got called with ${samples.size} samples");
  30 +}
  1 +#!/usr/bin/env bash
  2 +#
  3 +# Auto generated! Please DO NOT EDIT!
  4 +
  5 +# Please set the environment variable ANDROID_NDK
  6 +# before running this script
  7 +
  8 +# Inside the $ANDROID_NDK directory, you can find a binary ndk-build
  9 +# and some other files like the file "build/cmake/android.toolchain.cmake"
  10 +
  11 +set -ex
  12 +
  13 +log() {
  14 + # This function is from espnet
  15 + local fname=${BASH_SOURCE[1]##*/}
  16 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  17 +}
  18 +
  19 +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  20 +
  21 +log "Building streaming ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}"
  22 +
  23 +export SHERPA_ONNX_ENABLE_TTS=OFF
  24 +
  25 +log "====================arm64-v8a================="
  26 +./build-android-arm64-v8a.sh
  27 +log "====================armv7-eabi================"
  28 +./build-android-armv7-eabi.sh
  29 +log "====================x86-64===================="
  30 +./build-android-x86-64.sh
  31 +log "====================x86===================="
  32 +./build-android-x86.sh
  33 +
  34 +mkdir -p apks
  35 +
  36 +{% for model in model_list %}
  37 +pushd ./android/SherpaOnnx/app/src/main/assets/
  38 +model_name={{ model.model_name }}
  39 +type={{ model.idx }}
  40 +lang={{ model.lang }}
  41 +short_name={{ model.short_name }}
  42 +
  43 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
  44 +tar xvf ${model_name}.tar.bz2
  45 +
  46 +{{ model.cmd }}
  47 +
  48 +rm -rf *.tar.bz2
  49 +ls -lh $model_name
  50 +
  51 +popd
  52 +# Now we are at the project root directory
  53 +
  54 +git checkout .
  55 +pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx
  56 +sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt
  57 +git diff
  58 +popd
  59 +
  60 +for arch in arm64-v8a armeabi-v7a x86_64 x86; do
  61 + log "------------------------------------------------------------"
  62 + log "build ASR apk for $arch"
  63 + log "------------------------------------------------------------"
  64 + src_arch=$arch
  65 + if [ $arch == "armeabi-v7a" ]; then
  66 + src_arch=armv7-eabi
  67 + elif [ $arch == "x86_64" ]; then
  68 + src_arch=x86-64
  69 + fi
  70 +
  71 + ls -lh ./build-android-$src_arch/install/lib/*.so
  72 +
  73 + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnx/app/src/main/jniLibs/$arch/
  74 +
  75 + pushd ./android/SherpaOnnx
  76 + sed -i.bak s/2048/9012/g ./gradle.properties
  77 + git diff ./gradle.properties
  78 + ./gradlew assembleRelease
  79 + popd
  80 +
  81 + mv android/SherpaOnnx/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-asr-$lang-$short_name.apk
  82 + ls -lh apks
  83 + rm -v ./android/SherpaOnnx/app/src/main/jniLibs/$arch/*.so
  84 +done
  85 +
  86 +rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name
  87 +{% endfor %}
  88 +
  89 +git checkout .
  90 +
  91 +ls -lh apks/
@@ -29,6 +29,8 @@ log "====================x86-64====================" @@ -29,6 +29,8 @@ log "====================x86-64===================="
29 log "====================x86====================" 29 log "====================x86===================="
30 ./build-android-x86.sh 30 ./build-android-x86.sh
31 31
  32 +export SHERPA_ONNX_ENABLE_TTS=OFF
  33 +
32 mkdir -p apks 34 mkdir -p apks
33 35
34 {% for model in model_list %} 36 {% for model in model_list %}
@@ -29,6 +29,8 @@ log "====================x86-64====================" @@ -29,6 +29,8 @@ log "====================x86-64===================="
29 log "====================x86====================" 29 log "====================x86===================="
30 ./build-android-x86.sh 30 ./build-android-x86.sh
31 31
  32 +export SHERPA_ONNX_ENABLE_TTS=OFF
  33 +
32 mkdir -p apks 34 mkdir -p apks
33 35
34 {% for model in model_list %} 36 {% for model in model_list %}
@@ -29,6 +29,8 @@ log "====================x86-64====================" @@ -29,6 +29,8 @@ log "====================x86-64===================="
29 log "====================x86====================" 29 log "====================x86===================="
30 ./build-android-x86.sh 30 ./build-android-x86.sh
31 31
  32 +export SHERPA_ONNX_ENABLE_TTS=OFF
  33 +
32 mkdir -p apks 34 mkdir -p apks
33 35
34 {% for model in model_list %} 36 {% for model in model_list %}
@@ -29,6 +29,8 @@ log "====================x86-64====================" @@ -29,6 +29,8 @@ log "====================x86-64===================="
29 log "====================x86====================" 29 log "====================x86===================="
30 ./build-android-x86.sh 30 ./build-android-x86.sh
31 31
  32 +export SHERPA_ONNX_ENABLE_TTS=OFF
  33 +
32 mkdir -p apks 34 mkdir -p apks
33 35
34 {% for model in model_list %} 36 {% for model in model_list %}
@@ -29,6 +29,8 @@ log "====================x86-64====================" @@ -29,6 +29,8 @@ log "====================x86-64===================="
29 log "====================x86====================" 29 log "====================x86===================="
30 ./build-android-x86.sh 30 ./build-android-x86.sh
31 31
  32 +export SHERPA_ONNX_ENABLE_TTS=ON
  33 +
32 mkdir -p apks 34 mkdir -p apks
33 35
34 {% for tts_model in tts_model_list %} 36 {% for tts_model in tts_model_list %}
@@ -29,6 +29,8 @@ log "====================x86-64====================" @@ -29,6 +29,8 @@ log "====================x86-64===================="
29 log "====================x86====================" 29 log "====================x86===================="
30 ./build-android-x86.sh 30 ./build-android-x86.sh
31 31
  32 +export SHERPA_ONNX_ENABLE_TTS=ON
  33 +
32 mkdir -p apks 34 mkdir -p apks
33 35
34 {% for tts_model in tts_model_list %} 36 {% for tts_model in tts_model_list %}
  1 +#!/usr/bin/env python3
  2 +
  3 +import argparse
  4 +from dataclasses import dataclass
  5 +from typing import List, Optional
  6 +
  7 +import jinja2
  8 +
  9 +
  10 +def get_args():
  11 + parser = argparse.ArgumentParser()
  12 + parser.add_argument(
  13 + "--total",
  14 + type=int,
  15 + default=1,
  16 + help="Number of runners",
  17 + )
  18 + parser.add_argument(
  19 + "--index",
  20 + type=int,
  21 + default=0,
  22 + help="Index of the current runner",
  23 + )
  24 + return parser.parse_args()
  25 +
  26 +
  27 +@dataclass
  28 +class Model:
  29 + # We will download
  30 + # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
  31 + model_name: str
  32 +
  33 + # The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code
  34 + idx: int
  35 +
  36 + # e.g., zh, en, zh_en
  37 + lang: str
  38 +
  39 + # e.g., whisper, paraformer, zipformer
  40 + short_name: str = ""
  41 +
  42 + # cmd is used to remove extra file from the model directory
  43 + cmd: str = ""
  44 +
  45 +
  46 +def get_models():
  47 + models = [
  48 + Model(
  49 + model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
  50 + idx=8,
  51 + lang="bilingual_zh_en",
  52 + short_name="zipformer",
  53 + cmd="""
  54 + pushd $model_name
  55 + rm -v decoder-epoch-99-avg-1.int8.onnx
  56 + rm -v encoder-epoch-99-avg-1.onnx
  57 + rm -v joiner-epoch-99-avg-1.onnx
  58 +
  59 + rm -v *.sh
  60 + rm -v .gitattributes
  61 + rm -v *state*
  62 + rm -rfv test_wavs
  63 +
  64 + ls -lh
  65 +
  66 + popd
  67 + """,
  68 + ),
  69 + ]
  70 +
  71 + return models
  72 +
  73 +
  74 +def main():
  75 + args = get_args()
  76 + index = args.index
  77 + total = args.total
  78 + assert 0 <= index < total, (index, total)
  79 +
  80 + all_model_list = get_models()
  81 +
  82 + num_models = len(all_model_list)
  83 +
  84 + num_per_runner = num_models // total
  85 + if num_per_runner <= 0:
  86 + raise ValueError(f"num_models: {num_models}, num_runners: {total}")
  87 +
  88 + start = index * num_per_runner
  89 + end = start + num_per_runner
  90 +
  91 + remaining = num_models - args.total * num_per_runner
  92 +
  93 + print(f"{index}/{total}: {start}-{end}/{num_models}")
  94 +
  95 + d = dict()
  96 + d["model_list"] = all_model_list[start:end]
  97 + if index < remaining:
  98 + s = args.total * num_per_runner + index
  99 + d["model_list"].append(all_model_list[s])
  100 + print(f"{s}/{num_models}")
  101 +
  102 + filename_list = [
  103 + "./build-apk-asr.sh",
  104 + ]
  105 + for filename in filename_list:
  106 + environment = jinja2.Environment()
  107 + with open(f"{filename}.in") as f:
  108 + s = f.read()
  109 + template = environment.from_string(s)
  110 +
  111 + s = template.render(**d)
  112 + with open(filename, "w") as f:
  113 + print(s, file=f)
  114 +
  115 +
  116 +if __name__ == "__main__":
  117 + main()
@@ -82,7 +82,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { @@ -82,7 +82,7 @@ bool OfflineTtsVitsModelConfig::Validate() const {
82 82
83 for (const auto &f : required_files) { 83 for (const auto &f : required_files) {
84 if (!FileExists(dict_dir + "/" + f)) { 84 if (!FileExists(dict_dir + "/" + f)) {
85 - SHERPA_ONNX_LOGE("'%s/%s' does not exist.", data_dir.c_str(), 85 + SHERPA_ONNX_LOGE("'%s/%s' does not exist.", dict_dir.c_str(),
86 f.c_str()); 86 f.c_str());
87 return false; 87 return false;
88 } 88 }
@@ -12,8 +12,15 @@ endif() @@ -12,8 +12,15 @@ endif()
12 set(sources 12 set(sources
13 audio-tagging.cc 13 audio-tagging.cc
14 jni.cc 14 jni.cc
  15 + keyword-spotter.cc
  16 + offline-recognizer.cc
15 offline-stream.cc 17 offline-stream.cc
  18 + online-recognizer.cc
  19 + online-stream.cc
  20 + speaker-embedding-extractor.cc
  21 + speaker-embedding-manager.cc
16 spoken-language-identification.cc 22 spoken-language-identification.cc
  23 + voice-activity-detector.cc
17 ) 24 )
18 25
19 if(SHERPA_ONNX_ENABLE_TTS) 26 if(SHERPA_ONNX_ENABLE_TTS)
@@ -6,6 +6,8 @@ @@ -6,6 +6,8 @@
6 #define SHERPA_ONNX_JNI_COMMON_H_ 6 #define SHERPA_ONNX_JNI_COMMON_H_
7 7
8 #if __ANDROID_API__ >= 9 8 #if __ANDROID_API__ >= 9
  9 +#include <strstream>
  10 +
9 #include "android/asset_manager.h" 11 #include "android/asset_manager.h"
10 #include "android/asset_manager_jni.h" 12 #include "android/asset_manager_jni.h"
11 #endif 13 #endif