Committed by
GitHub
Refactor the JNI interface to make it more modular and maintainable (#802)
正在显示
98 个修改的文件
包含
1141 行增加
和
957 行删除
.github/workflows/apk-asr.yaml
0 → 100644
| 1 | +name: apk-asr | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + tags: | ||
| 6 | + - '*' | ||
| 7 | + | ||
| 8 | + workflow_dispatch: | ||
| 9 | + | ||
| 10 | +concurrency: | ||
| 11 | + group: apk-asr-${{ github.ref }} | ||
| 12 | + cancel-in-progress: true | ||
| 13 | + | ||
| 14 | +permissions: | ||
| 15 | + contents: write | ||
| 16 | + | ||
| 17 | +jobs: | ||
| 18 | + apk_asr: | ||
| 19 | + if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' | ||
| 20 | + runs-on: ${{ matrix.os }} | ||
| 21 | + name: apk for asr ${{ matrix.index }}/${{ matrix.total }} | ||
| 22 | + strategy: | ||
| 23 | + fail-fast: false | ||
| 24 | + matrix: | ||
| 25 | + os: [ubuntu-latest] | ||
| 26 | + total: ["1"] | ||
| 27 | + index: ["0"] | ||
| 28 | + | ||
| 29 | + steps: | ||
| 30 | + - uses: actions/checkout@v4 | ||
| 31 | + with: | ||
| 32 | + fetch-depth: 0 | ||
| 33 | + | ||
| 34 | + # https://github.com/actions/setup-java | ||
| 35 | + - uses: actions/setup-java@v4 | ||
| 36 | + with: | ||
| 37 | + distribution: 'temurin' # See 'Supported distributions' for available options | ||
| 38 | + java-version: '21' | ||
| 39 | + | ||
| 40 | + - name: ccache | ||
| 41 | + uses: hendrikmuhs/ccache-action@v1.2 | ||
| 42 | + with: | ||
| 43 | + key: ${{ matrix.os }}-android | ||
| 44 | + | ||
| 45 | + - name: Display NDK HOME | ||
| 46 | + shell: bash | ||
| 47 | + run: | | ||
| 48 | + echo "ANDROID_NDK_LATEST_HOME: ${ANDROID_NDK_LATEST_HOME}" | ||
| 49 | + ls -lh ${ANDROID_NDK_LATEST_HOME} | ||
| 50 | + | ||
| 51 | + - name: Install Python dependencies | ||
| 52 | + shell: bash | ||
| 53 | + run: | | ||
| 54 | + python3 -m pip install --upgrade pip jinja2 | ||
| 55 | + | ||
| 56 | + - name: Setup build tool version variable | ||
| 57 | + shell: bash | ||
| 58 | + run: | | ||
| 59 | + echo "---" | ||
| 60 | + ls -lh /usr/local/lib/android/ | ||
| 61 | + echo "---" | ||
| 62 | + | ||
| 63 | + ls -lh /usr/local/lib/android/sdk | ||
| 64 | + echo "---" | ||
| 65 | + | ||
| 66 | + ls -lh /usr/local/lib/android/sdk/build-tools | ||
| 67 | + echo "---" | ||
| 68 | + | ||
| 69 | + BUILD_TOOL_VERSION=$(ls /usr/local/lib/android/sdk/build-tools/ | tail -n 1) | ||
| 70 | + echo "BUILD_TOOL_VERSION=$BUILD_TOOL_VERSION" >> $GITHUB_ENV | ||
| 71 | + echo "Last build tool version is: $BUILD_TOOL_VERSION" | ||
| 72 | + | ||
| 73 | + - name: Generate build script | ||
| 74 | + shell: bash | ||
| 75 | + run: | | ||
| 76 | + cd scripts/apk | ||
| 77 | + | ||
| 78 | + total=${{ matrix.total }} | ||
| 79 | + index=${{ matrix.index }} | ||
| 80 | + | ||
| 81 | + ./generate-asr-apk-script.py --total $total --index $index | ||
| 82 | + | ||
| 83 | + chmod +x build-apk-asr.sh | ||
| 84 | + mv -v ./build-apk-asr.sh ../.. | ||
| 85 | + | ||
| 86 | + - name: build APK | ||
| 87 | + shell: bash | ||
| 88 | + run: | | ||
| 89 | + export CMAKE_CXX_COMPILER_LAUNCHER=ccache | ||
| 90 | + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" | ||
| 91 | + cmake --version | ||
| 92 | + | ||
| 93 | + export ANDROID_NDK=$ANDROID_NDK_LATEST_HOME | ||
| 94 | + ./build-apk-asr.sh | ||
| 95 | + | ||
| 96 | + - name: Display APK | ||
| 97 | + shell: bash | ||
| 98 | + run: | | ||
| 99 | + ls -lh ./apks/ | ||
| 100 | + du -h -d1 . | ||
| 101 | + | ||
| 102 | + # https://github.com/marketplace/actions/sign-android-release | ||
| 103 | + - uses: r0adkll/sign-android-release@v1 | ||
| 104 | + name: Sign app APK | ||
| 105 | + with: | ||
| 106 | + releaseDirectory: ./apks | ||
| 107 | + signingKeyBase64: ${{ secrets.ANDROID_SIGNING_KEY }} | ||
| 108 | + alias: ${{ secrets.ANDROID_SIGNING_KEY_ALIAS }} | ||
| 109 | + keyStorePassword: ${{ secrets.ANDROID_SIGNING_KEY_STORE_PASSWORD }} | ||
| 110 | + env: | ||
| 111 | + BUILD_TOOLS_VERSION: ${{ env.BUILD_TOOL_VERSION }} | ||
| 112 | + | ||
| 113 | + - name: Display APK after signing | ||
| 114 | + shell: bash | ||
| 115 | + run: | | ||
| 116 | + ls -lh ./apks/ | ||
| 117 | + du -h -d1 . | ||
| 118 | + | ||
| 119 | + - name: Rename APK after signing | ||
| 120 | + shell: bash | ||
| 121 | + run: | | ||
| 122 | + cd apks | ||
| 123 | + rm -fv signingKey.jks | ||
| 124 | + rm -fv *.apk.idsig | ||
| 125 | + rm -fv *-aligned.apk | ||
| 126 | + | ||
| 127 | + all_apks=$(ls -1 *-signed.apk) | ||
| 128 | + echo "----" | ||
| 129 | + echo $all_apks | ||
| 130 | + echo "----" | ||
| 131 | + for apk in ${all_apks[@]}; do | ||
| 132 | + n=$(echo $apk | sed -e s/-signed//) | ||
| 133 | + mv -v $apk $n | ||
| 134 | + done | ||
| 135 | + | ||
| 136 | + cd .. | ||
| 137 | + | ||
| 138 | + ls -lh ./apks/ | ||
| 139 | + du -h -d1 . | ||
| 140 | + | ||
| 141 | + - name: Display APK after rename | ||
| 142 | + shell: bash | ||
| 143 | + run: | | ||
| 144 | + ls -lh ./apks/ | ||
| 145 | + du -h -d1 . | ||
| 146 | + | ||
| 147 | + - name: Publish to huggingface | ||
| 148 | + env: | ||
| 149 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 150 | + uses: nick-fields/retry@v3 | ||
| 151 | + with: | ||
| 152 | + max_attempts: 20 | ||
| 153 | + timeout_seconds: 200 | ||
| 154 | + shell: bash | ||
| 155 | + command: | | ||
| 156 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 157 | + git config --global user.name "Fangjun Kuang" | ||
| 158 | + | ||
| 159 | + rm -rf huggingface | ||
| 160 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 161 | + | ||
| 162 | + git clone https://huggingface.co/csukuangfj/sherpa-onnx-apk huggingface | ||
| 163 | + cd huggingface | ||
| 164 | + git fetch | ||
| 165 | + git pull | ||
| 166 | + git merge -m "merge remote" --ff origin main | ||
| 167 | + | ||
| 168 | + mkdir -p asr | ||
| 169 | + cp -v ../apks/*.apk ./asr/ | ||
| 170 | + git status | ||
| 171 | + git lfs track "*.apk" | ||
| 172 | + git add . | ||
| 173 | + git commit -m "add more apks" | ||
| 174 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-apk main |
| @@ -95,3 +95,4 @@ sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 | @@ -95,3 +95,4 @@ sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 | ||
| 95 | spoken-language-identification-test-wavs | 95 | spoken-language-identification-test-wavs |
| 96 | my-release-key* | 96 | my-release-key* |
| 97 | vits-zh-hf-fanchen-C | 97 | vits-zh-hf-fanchen-C |
| 98 | +sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 |
| @@ -16,6 +16,7 @@ | @@ -16,6 +16,7 @@ | ||
| 16 | tools:targetApi="31"> | 16 | tools:targetApi="31"> |
| 17 | <activity | 17 | <activity |
| 18 | android:name=".MainActivity" | 18 | android:name=".MainActivity" |
| 19 | + android:label="ASR: Next-gen Kaldi" | ||
| 19 | android:exported="true"> | 20 | android:exported="true"> |
| 20 | <intent-filter> | 21 | <intent-filter> |
| 21 | <action android:name="android.intent.action.MAIN" /> | 22 | <action android:name="android.intent.action.MAIN" /> |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt |
| @@ -12,16 +12,19 @@ import android.widget.Button | @@ -12,16 +12,19 @@ import android.widget.Button | ||
| 12 | import android.widget.TextView | 12 | import android.widget.TextView |
| 13 | import androidx.appcompat.app.AppCompatActivity | 13 | import androidx.appcompat.app.AppCompatActivity |
| 14 | import androidx.core.app.ActivityCompat | 14 | import androidx.core.app.ActivityCompat |
| 15 | -import com.k2fsa.sherpa.onnx.* | ||
| 16 | import kotlin.concurrent.thread | 15 | import kotlin.concurrent.thread |
| 17 | 16 | ||
| 18 | private const val TAG = "sherpa-onnx" | 17 | private const val TAG = "sherpa-onnx" |
| 19 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | 18 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 |
| 20 | 19 | ||
| 20 | +// To enable microphone in android emulator, use | ||
| 21 | +// | ||
| 22 | +// adb emu avd hostmicon | ||
| 23 | + | ||
| 21 | class MainActivity : AppCompatActivity() { | 24 | class MainActivity : AppCompatActivity() { |
| 22 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | 25 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) |
| 23 | 26 | ||
| 24 | - private lateinit var model: SherpaOnnx | 27 | + private lateinit var recognizer: OnlineRecognizer |
| 25 | private var audioRecord: AudioRecord? = null | 28 | private var audioRecord: AudioRecord? = null |
| 26 | private lateinit var recordButton: Button | 29 | private lateinit var recordButton: Button |
| 27 | private lateinit var textView: TextView | 30 | private lateinit var textView: TextView |
| @@ -87,7 +90,6 @@ class MainActivity : AppCompatActivity() { | @@ -87,7 +90,6 @@ class MainActivity : AppCompatActivity() { | ||
| 87 | audioRecord!!.startRecording() | 90 | audioRecord!!.startRecording() |
| 88 | recordButton.setText(R.string.stop) | 91 | recordButton.setText(R.string.stop) |
| 89 | isRecording = true | 92 | isRecording = true |
| 90 | - model.reset(true) | ||
| 91 | textView.text = "" | 93 | textView.text = "" |
| 92 | lastText = "" | 94 | lastText = "" |
| 93 | idx = 0 | 95 | idx = 0 |
| @@ -108,6 +110,7 @@ class MainActivity : AppCompatActivity() { | @@ -108,6 +110,7 @@ class MainActivity : AppCompatActivity() { | ||
| 108 | 110 | ||
| 109 | private fun processSamples() { | 111 | private fun processSamples() { |
| 110 | Log.i(TAG, "processing samples") | 112 | Log.i(TAG, "processing samples") |
| 113 | + val stream = recognizer.createStream() | ||
| 111 | 114 | ||
| 112 | val interval = 0.1 // i.e., 100 ms | 115 | val interval = 0.1 // i.e., 100 ms |
| 113 | val bufferSize = (interval * sampleRateInHz).toInt() // in samples | 116 | val bufferSize = (interval * sampleRateInHz).toInt() // in samples |
| @@ -117,29 +120,41 @@ class MainActivity : AppCompatActivity() { | @@ -117,29 +120,41 @@ class MainActivity : AppCompatActivity() { | ||
| 117 | val ret = audioRecord?.read(buffer, 0, buffer.size) | 120 | val ret = audioRecord?.read(buffer, 0, buffer.size) |
| 118 | if (ret != null && ret > 0) { | 121 | if (ret != null && ret > 0) { |
| 119 | val samples = FloatArray(ret) { buffer[it] / 32768.0f } | 122 | val samples = FloatArray(ret) { buffer[it] / 32768.0f } |
| 120 | - model.acceptWaveform(samples, sampleRate=sampleRateInHz) | ||
| 121 | - while (model.isReady()) { | ||
| 122 | - model.decode() | 123 | + stream.acceptWaveform(samples, sampleRate = sampleRateInHz) |
| 124 | + while (recognizer.isReady(stream)) { | ||
| 125 | + recognizer.decode(stream) | ||
| 123 | } | 126 | } |
| 124 | 127 | ||
| 125 | - val isEndpoint = model.isEndpoint() | ||
| 126 | - val text = model.text | 128 | + val isEndpoint = recognizer.isEndpoint(stream) |
| 129 | + var text = recognizer.getResult(stream).text | ||
| 127 | 130 | ||
| 128 | - var textToDisplay = lastText; | 131 | + // For streaming parformer, we need to manually add some |
| 132 | + // paddings so that it has enough right context to | ||
| 133 | + // recognize the last word of this segment | ||
| 134 | + if (isEndpoint && recognizer.config.modelConfig.paraformer.encoder.isNotBlank()) { | ||
| 135 | + val tailPaddings = FloatArray((0.8 * sampleRateInHz).toInt()) | ||
| 136 | + stream.acceptWaveform(tailPaddings, sampleRate = sampleRateInHz) | ||
| 137 | + while (recognizer.isReady(stream)) { | ||
| 138 | + recognizer.decode(stream) | ||
| 139 | + } | ||
| 140 | + text = recognizer.getResult(stream).text | ||
| 141 | + } | ||
| 129 | 142 | ||
| 130 | - if(text.isNotBlank()) { | ||
| 131 | - if (lastText.isBlank()) { | ||
| 132 | - textToDisplay = "${idx}: ${text}" | 143 | + var textToDisplay = lastText |
| 144 | + | ||
| 145 | + if (text.isNotBlank()) { | ||
| 146 | + textToDisplay = if (lastText.isBlank()) { | ||
| 147 | + "${idx}: $text" | ||
| 133 | } else { | 148 | } else { |
| 134 | - textToDisplay = "${lastText}\n${idx}: ${text}" | 149 | + "${lastText}\n${idx}: $text" |
| 135 | } | 150 | } |
| 136 | } | 151 | } |
| 137 | 152 | ||
| 138 | if (isEndpoint) { | 153 | if (isEndpoint) { |
| 139 | - model.reset() | 154 | + recognizer.reset(stream) |
| 140 | if (text.isNotBlank()) { | 155 | if (text.isNotBlank()) { |
| 141 | - lastText = "${lastText}\n${idx}: ${text}" | ||
| 142 | - textToDisplay = lastText; | 156 | + lastText = "${lastText}\n${idx}: $text" |
| 157 | + textToDisplay = lastText | ||
| 143 | idx += 1 | 158 | idx += 1 |
| 144 | } | 159 | } |
| 145 | } | 160 | } |
| @@ -149,6 +164,7 @@ class MainActivity : AppCompatActivity() { | @@ -149,6 +164,7 @@ class MainActivity : AppCompatActivity() { | ||
| 149 | } | 164 | } |
| 150 | } | 165 | } |
| 151 | } | 166 | } |
| 167 | + stream.release() | ||
| 152 | } | 168 | } |
| 153 | 169 | ||
| 154 | private fun initMicrophone(): Boolean { | 170 | private fun initMicrophone(): Boolean { |
| @@ -180,7 +196,7 @@ class MainActivity : AppCompatActivity() { | @@ -180,7 +196,7 @@ class MainActivity : AppCompatActivity() { | ||
| 180 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | 196 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html |
| 181 | // for a list of available models | 197 | // for a list of available models |
| 182 | val type = 0 | 198 | val type = 0 |
| 183 | - println("Select model type ${type}") | 199 | + Log.i(TAG, "Select model type $type") |
| 184 | val config = OnlineRecognizerConfig( | 200 | val config = OnlineRecognizerConfig( |
| 185 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), | 201 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), |
| 186 | modelConfig = getModelConfig(type = type)!!, | 202 | modelConfig = getModelConfig(type = type)!!, |
| @@ -189,7 +205,7 @@ class MainActivity : AppCompatActivity() { | @@ -189,7 +205,7 @@ class MainActivity : AppCompatActivity() { | ||
| 189 | enableEndpoint = true, | 205 | enableEndpoint = true, |
| 190 | ) | 206 | ) |
| 191 | 207 | ||
| 192 | - model = SherpaOnnx( | 208 | + recognizer = OnlineRecognizer( |
| 193 | assetManager = application.assets, | 209 | assetManager = application.assets, |
| 194 | config = config, | 210 | config = config, |
| 195 | ) | 211 | ) |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/WaveReader.kt |
| @@ -16,6 +16,7 @@ | @@ -16,6 +16,7 @@ | ||
| 16 | tools:targetApi="31"> | 16 | tools:targetApi="31"> |
| 17 | <activity | 17 | <activity |
| 18 | android:name=".MainActivity" | 18 | android:name=".MainActivity" |
| 19 | + android:label="2pass ASR: Next-gen Kaldi" | ||
| 19 | android:exported="true"> | 20 | android:exported="true"> |
| 20 | <intent-filter> | 21 | <intent-filter> |
| 21 | <action android:name="android.intent.action.MAIN" /> | 22 | <action android:name="android.intent.action.MAIN" /> |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt |
| @@ -17,11 +17,13 @@ import kotlin.concurrent.thread | @@ -17,11 +17,13 @@ import kotlin.concurrent.thread | ||
| 17 | private const val TAG = "sherpa-onnx" | 17 | private const val TAG = "sherpa-onnx" |
| 18 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | 18 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 |
| 19 | 19 | ||
| 20 | +// adb emu avd hostmicon | ||
| 21 | +// to enable microphone inside the emulator | ||
| 20 | class MainActivity : AppCompatActivity() { | 22 | class MainActivity : AppCompatActivity() { |
| 21 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | 23 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) |
| 22 | 24 | ||
| 23 | - private lateinit var onlineRecognizer: SherpaOnnx | ||
| 24 | - private lateinit var offlineRecognizer: SherpaOnnxOffline | 25 | + private lateinit var onlineRecognizer: OnlineRecognizer |
| 26 | + private lateinit var offlineRecognizer: OfflineRecognizer | ||
| 25 | private var audioRecord: AudioRecord? = null | 27 | private var audioRecord: AudioRecord? = null |
| 26 | private lateinit var recordButton: Button | 28 | private lateinit var recordButton: Button |
| 27 | private lateinit var textView: TextView | 29 | private lateinit var textView: TextView |
| @@ -93,7 +95,6 @@ class MainActivity : AppCompatActivity() { | @@ -93,7 +95,6 @@ class MainActivity : AppCompatActivity() { | ||
| 93 | audioRecord!!.startRecording() | 95 | audioRecord!!.startRecording() |
| 94 | recordButton.setText(R.string.stop) | 96 | recordButton.setText(R.string.stop) |
| 95 | isRecording = true | 97 | isRecording = true |
| 96 | - onlineRecognizer.reset(true) | ||
| 97 | samplesBuffer.clear() | 98 | samplesBuffer.clear() |
| 98 | textView.text = "" | 99 | textView.text = "" |
| 99 | lastText = "" | 100 | lastText = "" |
| @@ -115,6 +116,7 @@ class MainActivity : AppCompatActivity() { | @@ -115,6 +116,7 @@ class MainActivity : AppCompatActivity() { | ||
| 115 | 116 | ||
| 116 | private fun processSamples() { | 117 | private fun processSamples() { |
| 117 | Log.i(TAG, "processing samples") | 118 | Log.i(TAG, "processing samples") |
| 119 | + val stream = onlineRecognizer.createStream() | ||
| 118 | 120 | ||
| 119 | val interval = 0.1 // i.e., 100 ms | 121 | val interval = 0.1 // i.e., 100 ms |
| 120 | val bufferSize = (interval * sampleRateInHz).toInt() // in samples | 122 | val bufferSize = (interval * sampleRateInHz).toInt() // in samples |
| @@ -126,29 +128,29 @@ class MainActivity : AppCompatActivity() { | @@ -126,29 +128,29 @@ class MainActivity : AppCompatActivity() { | ||
| 126 | val samples = FloatArray(ret) { buffer[it] / 32768.0f } | 128 | val samples = FloatArray(ret) { buffer[it] / 32768.0f } |
| 127 | samplesBuffer.add(samples) | 129 | samplesBuffer.add(samples) |
| 128 | 130 | ||
| 129 | - onlineRecognizer.acceptWaveform(samples, sampleRate = sampleRateInHz) | ||
| 130 | - while (onlineRecognizer.isReady()) { | ||
| 131 | - onlineRecognizer.decode() | 131 | + stream.acceptWaveform(samples, sampleRate = sampleRateInHz) |
| 132 | + while (onlineRecognizer.isReady(stream)) { | ||
| 133 | + onlineRecognizer.decode(stream) | ||
| 132 | } | 134 | } |
| 133 | - val isEndpoint = onlineRecognizer.isEndpoint() | 135 | + val isEndpoint = onlineRecognizer.isEndpoint(stream) |
| 134 | var textToDisplay = lastText | 136 | var textToDisplay = lastText |
| 135 | 137 | ||
| 136 | - var text = onlineRecognizer.text | 138 | + var text = onlineRecognizer.getResult(stream).text |
| 137 | if (text.isNotBlank()) { | 139 | if (text.isNotBlank()) { |
| 138 | - if (lastText.isBlank()) { | 140 | + textToDisplay = if (lastText.isBlank()) { |
| 139 | // textView.text = "${idx}: ${text}" | 141 | // textView.text = "${idx}: ${text}" |
| 140 | - textToDisplay = "${idx}: ${text}" | 142 | + "${idx}: $text" |
| 141 | } else { | 143 | } else { |
| 142 | - textToDisplay = "${lastText}\n${idx}: ${text}" | 144 | + "${lastText}\n${idx}: $text" |
| 143 | } | 145 | } |
| 144 | } | 146 | } |
| 145 | 147 | ||
| 146 | if (isEndpoint) { | 148 | if (isEndpoint) { |
| 147 | - onlineRecognizer.reset() | 149 | + onlineRecognizer.reset(stream) |
| 148 | 150 | ||
| 149 | if (text.isNotBlank()) { | 151 | if (text.isNotBlank()) { |
| 150 | text = runSecondPass() | 152 | text = runSecondPass() |
| 151 | - lastText = "${lastText}\n${idx}: ${text}" | 153 | + lastText = "${lastText}\n${idx}: $text" |
| 152 | idx += 1 | 154 | idx += 1 |
| 153 | } else { | 155 | } else { |
| 154 | samplesBuffer.clear() | 156 | samplesBuffer.clear() |
| @@ -160,6 +162,7 @@ class MainActivity : AppCompatActivity() { | @@ -160,6 +162,7 @@ class MainActivity : AppCompatActivity() { | ||
| 160 | } | 162 | } |
| 161 | } | 163 | } |
| 162 | } | 164 | } |
| 165 | + stream.release() | ||
| 163 | } | 166 | } |
| 164 | 167 | ||
| 165 | private fun initMicrophone(): Boolean { | 168 | private fun initMicrophone(): Boolean { |
| @@ -190,8 +193,8 @@ class MainActivity : AppCompatActivity() { | @@ -190,8 +193,8 @@ class MainActivity : AppCompatActivity() { | ||
| 190 | // Please change getModelConfig() to add new models | 193 | // Please change getModelConfig() to add new models |
| 191 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | 194 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html |
| 192 | // for a list of available models | 195 | // for a list of available models |
| 193 | - val firstType = 1 | ||
| 194 | - println("Select model type ${firstType} for the first pass") | 196 | + val firstType = 9 |
| 197 | + Log.i(TAG, "Select model type $firstType for the first pass") | ||
| 195 | val config = OnlineRecognizerConfig( | 198 | val config = OnlineRecognizerConfig( |
| 196 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), | 199 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), |
| 197 | modelConfig = getModelConfig(type = firstType)!!, | 200 | modelConfig = getModelConfig(type = firstType)!!, |
| @@ -199,7 +202,7 @@ class MainActivity : AppCompatActivity() { | @@ -199,7 +202,7 @@ class MainActivity : AppCompatActivity() { | ||
| 199 | enableEndpoint = true, | 202 | enableEndpoint = true, |
| 200 | ) | 203 | ) |
| 201 | 204 | ||
| 202 | - onlineRecognizer = SherpaOnnx( | 205 | + onlineRecognizer = OnlineRecognizer( |
| 203 | assetManager = application.assets, | 206 | assetManager = application.assets, |
| 204 | config = config, | 207 | config = config, |
| 205 | ) | 208 | ) |
| @@ -209,15 +212,15 @@ class MainActivity : AppCompatActivity() { | @@ -209,15 +212,15 @@ class MainActivity : AppCompatActivity() { | ||
| 209 | // Please change getOfflineModelConfig() to add new models | 212 | // Please change getOfflineModelConfig() to add new models |
| 210 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | 213 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html |
| 211 | // for a list of available models | 214 | // for a list of available models |
| 212 | - val secondType = 1 | ||
| 213 | - println("Select model type ${secondType} for the second pass") | 215 | + val secondType = 0 |
| 216 | + Log.i(TAG, "Select model type $secondType for the second pass") | ||
| 214 | 217 | ||
| 215 | val config = OfflineRecognizerConfig( | 218 | val config = OfflineRecognizerConfig( |
| 216 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), | 219 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), |
| 217 | modelConfig = getOfflineModelConfig(type = secondType)!!, | 220 | modelConfig = getOfflineModelConfig(type = secondType)!!, |
| 218 | ) | 221 | ) |
| 219 | 222 | ||
| 220 | - offlineRecognizer = SherpaOnnxOffline( | 223 | + offlineRecognizer = OfflineRecognizer( |
| 221 | assetManager = application.assets, | 224 | assetManager = application.assets, |
| 222 | config = config, | 225 | config = config, |
| 223 | ) | 226 | ) |
| @@ -244,8 +247,15 @@ class MainActivity : AppCompatActivity() { | @@ -244,8 +247,15 @@ class MainActivity : AppCompatActivity() { | ||
| 244 | val n = maxOf(0, samples.size - 8000) | 247 | val n = maxOf(0, samples.size - 8000) |
| 245 | 248 | ||
| 246 | samplesBuffer.clear() | 249 | samplesBuffer.clear() |
| 247 | - samplesBuffer.add(samples.sliceArray(n..samples.size-1)) | 250 | + samplesBuffer.add(samples.sliceArray(n until samples.size)) |
| 248 | 251 | ||
| 249 | - return offlineRecognizer.decode(samples.sliceArray(0..n), sampleRateInHz) | 252 | + val stream = offlineRecognizer.createStream() |
| 253 | + stream.acceptWaveform(samples.sliceArray(0..n), sampleRateInHz) | ||
| 254 | + offlineRecognizer.decode(stream) | ||
| 255 | + val result = offlineRecognizer.getResult(stream) | ||
| 256 | + | ||
| 257 | + stream.release() | ||
| 258 | + | ||
| 259 | + return result.text | ||
| 250 | } | 260 | } |
| 251 | } | 261 | } |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt |
| 1 | -package com.k2fsa.sherpa.onnx | ||
| 2 | - | ||
| 3 | -import android.content.res.AssetManager | ||
| 4 | - | ||
| 5 | -data class EndpointRule( | ||
| 6 | - var mustContainNonSilence: Boolean, | ||
| 7 | - var minTrailingSilence: Float, | ||
| 8 | - var minUtteranceLength: Float, | ||
| 9 | -) | ||
| 10 | - | ||
| 11 | -data class EndpointConfig( | ||
| 12 | - var rule1: EndpointRule = EndpointRule(false, 2.0f, 0.0f), | ||
| 13 | - var rule2: EndpointRule = EndpointRule(true, 1.2f, 0.0f), | ||
| 14 | - var rule3: EndpointRule = EndpointRule(false, 0.0f, 20.0f) | ||
| 15 | -) | ||
| 16 | - | ||
| 17 | -data class OnlineTransducerModelConfig( | ||
| 18 | - var encoder: String = "", | ||
| 19 | - var decoder: String = "", | ||
| 20 | - var joiner: String = "", | ||
| 21 | -) | ||
| 22 | - | ||
| 23 | -data class OnlineParaformerModelConfig( | ||
| 24 | - var encoder: String = "", | ||
| 25 | - var decoder: String = "", | ||
| 26 | -) | ||
| 27 | - | ||
| 28 | -data class OnlineZipformer2CtcModelConfig( | ||
| 29 | - var model: String = "", | ||
| 30 | -) | ||
| 31 | - | ||
| 32 | -data class OnlineModelConfig( | ||
| 33 | - var transducer: OnlineTransducerModelConfig = OnlineTransducerModelConfig(), | ||
| 34 | - var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(), | ||
| 35 | - var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(), | ||
| 36 | - var tokens: String, | ||
| 37 | - var numThreads: Int = 1, | ||
| 38 | - var debug: Boolean = false, | ||
| 39 | - var provider: String = "cpu", | ||
| 40 | - var modelType: String = "", | ||
| 41 | -) | ||
| 42 | - | ||
| 43 | -data class OnlineLMConfig( | ||
| 44 | - var model: String = "", | ||
| 45 | - var scale: Float = 0.5f, | ||
| 46 | -) | ||
| 47 | - | ||
| 48 | -data class FeatureConfig( | ||
| 49 | - var sampleRate: Int = 16000, | ||
| 50 | - var featureDim: Int = 80, | ||
| 51 | -) | ||
| 52 | - | ||
| 53 | -data class OnlineRecognizerConfig( | ||
| 54 | - var featConfig: FeatureConfig = FeatureConfig(), | ||
| 55 | - var modelConfig: OnlineModelConfig, | ||
| 56 | - var lmConfig: OnlineLMConfig = OnlineLMConfig(), | ||
| 57 | - var endpointConfig: EndpointConfig = EndpointConfig(), | ||
| 58 | - var enableEndpoint: Boolean = true, | ||
| 59 | - var decodingMethod: String = "greedy_search", | ||
| 60 | - var maxActivePaths: Int = 4, | ||
| 61 | - var hotwordsFile: String = "", | ||
| 62 | - var hotwordsScore: Float = 1.5f, | ||
| 63 | -) | ||
| 64 | - | ||
| 65 | -data class OfflineTransducerModelConfig( | ||
| 66 | - var encoder: String = "", | ||
| 67 | - var decoder: String = "", | ||
| 68 | - var joiner: String = "", | ||
| 69 | -) | ||
| 70 | - | ||
| 71 | -data class OfflineParaformerModelConfig( | ||
| 72 | - var model: String = "", | ||
| 73 | -) | ||
| 74 | - | ||
| 75 | -data class OfflineWhisperModelConfig( | ||
| 76 | - var encoder: String = "", | ||
| 77 | - var decoder: String = "", | ||
| 78 | - var language: String = "en", // Used with multilingual model | ||
| 79 | - var task: String = "transcribe", // transcribe or translate | ||
| 80 | - var tailPaddings: Int = 1000, // Padding added at the end of the samples | ||
| 81 | -) | ||
| 82 | - | ||
| 83 | -data class OfflineModelConfig( | ||
| 84 | - var transducer: OfflineTransducerModelConfig = OfflineTransducerModelConfig(), | ||
| 85 | - var paraformer: OfflineParaformerModelConfig = OfflineParaformerModelConfig(), | ||
| 86 | - var whisper: OfflineWhisperModelConfig = OfflineWhisperModelConfig(), | ||
| 87 | - var numThreads: Int = 1, | ||
| 88 | - var debug: Boolean = false, | ||
| 89 | - var provider: String = "cpu", | ||
| 90 | - var modelType: String = "", | ||
| 91 | - var tokens: String, | ||
| 92 | -) | ||
| 93 | - | ||
| 94 | -data class OfflineRecognizerConfig( | ||
| 95 | - var featConfig: FeatureConfig = FeatureConfig(), | ||
| 96 | - var modelConfig: OfflineModelConfig, | ||
| 97 | - // var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it | ||
| 98 | - var decodingMethod: String = "greedy_search", | ||
| 99 | - var maxActivePaths: Int = 4, | ||
| 100 | - var hotwordsFile: String = "", | ||
| 101 | - var hotwordsScore: Float = 1.5f, | ||
| 102 | -) | ||
| 103 | - | ||
| 104 | -class SherpaOnnx( | ||
| 105 | - assetManager: AssetManager? = null, | ||
| 106 | - var config: OnlineRecognizerConfig, | ||
| 107 | -) { | ||
| 108 | - private val ptr: Long | ||
| 109 | - | ||
| 110 | - init { | ||
| 111 | - if (assetManager != null) { | ||
| 112 | - ptr = new(assetManager, config) | ||
| 113 | - } else { | ||
| 114 | - ptr = newFromFile(config) | ||
| 115 | - } | ||
| 116 | - } | ||
| 117 | - | ||
| 118 | - protected fun finalize() { | ||
| 119 | - delete(ptr) | ||
| 120 | - } | ||
| 121 | - | ||
| 122 | - fun acceptWaveform(samples: FloatArray, sampleRate: Int) = | ||
| 123 | - acceptWaveform(ptr, samples, sampleRate) | ||
| 124 | - | ||
| 125 | - fun inputFinished() = inputFinished(ptr) | ||
| 126 | - fun reset(recreate: Boolean = false, hotwords: String = "") = reset(ptr, recreate, hotwords) | ||
| 127 | - fun decode() = decode(ptr) | ||
| 128 | - fun isEndpoint(): Boolean = isEndpoint(ptr) | ||
| 129 | - fun isReady(): Boolean = isReady(ptr) | ||
| 130 | - | ||
| 131 | - val text: String | ||
| 132 | - get() = getText(ptr) | ||
| 133 | - | ||
| 134 | - val tokens: Array<String> | ||
| 135 | - get() = getTokens(ptr) | ||
| 136 | - | ||
| 137 | - private external fun delete(ptr: Long) | ||
| 138 | - | ||
| 139 | - private external fun new( | ||
| 140 | - assetManager: AssetManager, | ||
| 141 | - config: OnlineRecognizerConfig, | ||
| 142 | - ): Long | ||
| 143 | - | ||
| 144 | - private external fun newFromFile( | ||
| 145 | - config: OnlineRecognizerConfig, | ||
| 146 | - ): Long | ||
| 147 | - | ||
| 148 | - private external fun acceptWaveform(ptr: Long, samples: FloatArray, sampleRate: Int) | ||
| 149 | - private external fun inputFinished(ptr: Long) | ||
| 150 | - private external fun getText(ptr: Long): String | ||
| 151 | - private external fun reset(ptr: Long, recreate: Boolean, hotwords: String) | ||
| 152 | - private external fun decode(ptr: Long) | ||
| 153 | - private external fun isEndpoint(ptr: Long): Boolean | ||
| 154 | - private external fun isReady(ptr: Long): Boolean | ||
| 155 | - private external fun getTokens(ptr: Long): Array<String> | ||
| 156 | - | ||
| 157 | - companion object { | ||
| 158 | - init { | ||
| 159 | - System.loadLibrary("sherpa-onnx-jni") | ||
| 160 | - } | ||
| 161 | - } | ||
| 162 | -} | ||
| 163 | - | ||
| 164 | -class SherpaOnnxOffline( | ||
| 165 | - assetManager: AssetManager? = null, | ||
| 166 | - var config: OfflineRecognizerConfig, | ||
| 167 | -) { | ||
| 168 | - private val ptr: Long | ||
| 169 | - | ||
| 170 | - init { | ||
| 171 | - if (assetManager != null) { | ||
| 172 | - ptr = new(assetManager, config) | ||
| 173 | - } else { | ||
| 174 | - ptr = newFromFile(config) | ||
| 175 | - } | ||
| 176 | - } | ||
| 177 | - | ||
| 178 | - protected fun finalize() { | ||
| 179 | - delete(ptr) | ||
| 180 | - } | ||
| 181 | - | ||
| 182 | - fun decode(samples: FloatArray, sampleRate: Int) = decode(ptr, samples, sampleRate) | ||
| 183 | - | ||
| 184 | - private external fun delete(ptr: Long) | ||
| 185 | - | ||
| 186 | - private external fun new( | ||
| 187 | - assetManager: AssetManager, | ||
| 188 | - config: OfflineRecognizerConfig, | ||
| 189 | - ): Long | ||
| 190 | - | ||
| 191 | - private external fun newFromFile( | ||
| 192 | - config: OfflineRecognizerConfig, | ||
| 193 | - ): Long | ||
| 194 | - | ||
| 195 | - private external fun decode(ptr: Long, samples: FloatArray, sampleRate: Int): String | ||
| 196 | - | ||
| 197 | - companion object { | ||
| 198 | - init { | ||
| 199 | - System.loadLibrary("sherpa-onnx-jni") | ||
| 200 | - } | ||
| 201 | - } | ||
| 202 | -} | ||
| 203 | - | ||
| 204 | -fun getFeatureConfig(sampleRate: Int, featureDim: Int): FeatureConfig { | ||
| 205 | - return FeatureConfig(sampleRate = sampleRate, featureDim = featureDim) | ||
| 206 | -} | ||
| 207 | - | ||
| 208 | -/* | ||
| 209 | -Please see | ||
| 210 | -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 211 | -for a list of pre-trained models. | ||
| 212 | - | ||
| 213 | -We only add a few here. Please change the following code | ||
| 214 | -to add your own. (It should be straightforward to add a new model | ||
| 215 | -by following the code) | ||
| 216 | - | ||
| 217 | -@param type | ||
| 218 | -0 - csukuangfj/sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23 (Chinese) | ||
| 219 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23 | ||
| 220 | - encoder/joiner int8, decoder float32 | ||
| 221 | - | ||
| 222 | -1 - csukuangfj/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17 (English) | ||
| 223 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english | ||
| 224 | - encoder/joiner int8, decoder fp32 | ||
| 225 | - | ||
| 226 | - */ | ||
| 227 | -fun getModelConfig(type: Int): OnlineModelConfig? { | ||
| 228 | - when (type) { | ||
| 229 | - 0 -> { | ||
| 230 | - val modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23" | ||
| 231 | - return OnlineModelConfig( | ||
| 232 | - transducer = OnlineTransducerModelConfig( | ||
| 233 | - encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", | ||
| 234 | - decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", | ||
| 235 | - joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", | ||
| 236 | - ), | ||
| 237 | - tokens = "$modelDir/tokens.txt", | ||
| 238 | - modelType = "zipformer", | ||
| 239 | - ) | ||
| 240 | - } | ||
| 241 | - | ||
| 242 | - 1 -> { | ||
| 243 | - val modelDir = "sherpa-onnx-streaming-zipformer-en-20M-2023-02-17" | ||
| 244 | - return OnlineModelConfig( | ||
| 245 | - transducer = OnlineTransducerModelConfig( | ||
| 246 | - encoder = "$modelDir/encoder-epoch-99-avg-1.int8.onnx", | ||
| 247 | - decoder = "$modelDir/decoder-epoch-99-avg-1.onnx", | ||
| 248 | - joiner = "$modelDir/joiner-epoch-99-avg-1.int8.onnx", | ||
| 249 | - ), | ||
| 250 | - tokens = "$modelDir/tokens.txt", | ||
| 251 | - modelType = "zipformer", | ||
| 252 | - ) | ||
| 253 | - } | ||
| 254 | - } | ||
| 255 | - return null | ||
| 256 | -} | ||
| 257 | - | ||
| 258 | -/* | ||
| 259 | -Please see | ||
| 260 | -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 261 | -for a list of pre-trained models. | ||
| 262 | - | ||
| 263 | -We only add a few here. Please change the following code | ||
| 264 | -to add your own LM model. (It should be straightforward to train a new NN LM model | ||
| 265 | -by following the code, https://github.com/k2-fsa/icefall/blob/master/icefall/rnn_lm/train.py) | ||
| 266 | - | ||
| 267 | -@param type | ||
| 268 | -0 - sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) | ||
| 269 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english | ||
| 270 | - */ | ||
| 271 | -fun getOnlineLMConfig(type: Int): OnlineLMConfig { | ||
| 272 | - when (type) { | ||
| 273 | - 0 -> { | ||
| 274 | - val modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20" | ||
| 275 | - return OnlineLMConfig( | ||
| 276 | - model = "$modelDir/with-state-epoch-99-avg-1.int8.onnx", | ||
| 277 | - scale = 0.5f, | ||
| 278 | - ) | ||
| 279 | - } | ||
| 280 | - } | ||
| 281 | - return OnlineLMConfig() | ||
| 282 | -} | ||
| 283 | - | ||
| 284 | -// for English models, use a small value for rule2.minTrailingSilence, e.g., 0.8 | ||
| 285 | -fun getEndpointConfig(): EndpointConfig { | ||
| 286 | - return EndpointConfig( | ||
| 287 | - rule1 = EndpointRule(false, 2.4f, 0.0f), | ||
| 288 | - rule2 = EndpointRule(true, 0.8f, 0.0f), | ||
| 289 | - rule3 = EndpointRule(false, 0.0f, 20.0f) | ||
| 290 | - ) | ||
| 291 | -} | ||
| 292 | - | ||
| 293 | -/* | ||
| 294 | -Please see | ||
| 295 | -https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 296 | -for a list of pre-trained models. | ||
| 297 | - | ||
| 298 | -We only add a few here. Please change the following code | ||
| 299 | -to add your own. (It should be straightforward to add a new model | ||
| 300 | -by following the code) | ||
| 301 | - | ||
| 302 | -@param type | ||
| 303 | - | ||
| 304 | -0 - csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 (Chinese) | ||
| 305 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese | ||
| 306 | - int8 | ||
| 307 | - | ||
| 308 | -1 - icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04 (English) | ||
| 309 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#icefall-asr-multidataset-pruned-transducer-stateless7-2023-05-04-english | ||
| 310 | - encoder int8, decoder/joiner float32 | ||
| 311 | - | ||
| 312 | -2 - sherpa-onnx-whisper-tiny.en | ||
| 313 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en | ||
| 314 | - encoder int8, decoder int8 | ||
| 315 | - | ||
| 316 | -3 - sherpa-onnx-whisper-base.en | ||
| 317 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html#tiny-en | ||
| 318 | - encoder int8, decoder int8 | ||
| 319 | - | ||
| 320 | -4 - pkufool/icefall-asr-zipformer-wenetspeech-20230615 (Chinese) | ||
| 321 | - https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-wenetspeech-20230615-chinese | ||
| 322 | - encoder/joiner int8, decoder fp32 | ||
| 323 | - | ||
| 324 | - */ | ||
| 325 | -fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 326 | - when (type) { | ||
| 327 | - 0 -> { | ||
| 328 | - val modelDir = "sherpa-onnx-paraformer-zh-2023-03-28" | ||
| 329 | - return OfflineModelConfig( | ||
| 330 | - paraformer = OfflineParaformerModelConfig( | ||
| 331 | - model = "$modelDir/model.int8.onnx", | ||
| 332 | - ), | ||
| 333 | - tokens = "$modelDir/tokens.txt", | ||
| 334 | - modelType = "paraformer", | ||
| 335 | - ) | ||
| 336 | - } | ||
| 337 | - | ||
| 338 | - 1 -> { | ||
| 339 | - val modelDir = "icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04" | ||
| 340 | - return OfflineModelConfig( | ||
| 341 | - transducer = OfflineTransducerModelConfig( | ||
| 342 | - encoder = "$modelDir/encoder-epoch-30-avg-4.int8.onnx", | ||
| 343 | - decoder = "$modelDir/decoder-epoch-30-avg-4.onnx", | ||
| 344 | - joiner = "$modelDir/joiner-epoch-30-avg-4.onnx", | ||
| 345 | - ), | ||
| 346 | - tokens = "$modelDir/tokens.txt", | ||
| 347 | - modelType = "zipformer", | ||
| 348 | - ) | ||
| 349 | - } | ||
| 350 | - | ||
| 351 | - 2 -> { | ||
| 352 | - val modelDir = "sherpa-onnx-whisper-tiny.en" | ||
| 353 | - return OfflineModelConfig( | ||
| 354 | - whisper = OfflineWhisperModelConfig( | ||
| 355 | - encoder = "$modelDir/tiny.en-encoder.int8.onnx", | ||
| 356 | - decoder = "$modelDir/tiny.en-decoder.int8.onnx", | ||
| 357 | - ), | ||
| 358 | - tokens = "$modelDir/tiny.en-tokens.txt", | ||
| 359 | - modelType = "whisper", | ||
| 360 | - ) | ||
| 361 | - } | ||
| 362 | - | ||
| 363 | - 3 -> { | ||
| 364 | - val modelDir = "sherpa-onnx-whisper-base.en" | ||
| 365 | - return OfflineModelConfig( | ||
| 366 | - whisper = OfflineWhisperModelConfig( | ||
| 367 | - encoder = "$modelDir/base.en-encoder.int8.onnx", | ||
| 368 | - decoder = "$modelDir/base.en-decoder.int8.onnx", | ||
| 369 | - ), | ||
| 370 | - tokens = "$modelDir/base.en-tokens.txt", | ||
| 371 | - modelType = "whisper", | ||
| 372 | - ) | ||
| 373 | - } | ||
| 374 | - | ||
| 375 | - | ||
| 376 | - 4 -> { | ||
| 377 | - val modelDir = "icefall-asr-zipformer-wenetspeech-20230615" | ||
| 378 | - return OfflineModelConfig( | ||
| 379 | - transducer = OfflineTransducerModelConfig( | ||
| 380 | - encoder = "$modelDir/encoder-epoch-12-avg-4.int8.onnx", | ||
| 381 | - decoder = "$modelDir/decoder-epoch-12-avg-4.onnx", | ||
| 382 | - joiner = "$modelDir/joiner-epoch-12-avg-4.int8.onnx", | ||
| 383 | - ), | ||
| 384 | - tokens = "$modelDir/tokens.txt", | ||
| 385 | - modelType = "zipformer", | ||
| 386 | - ) | ||
| 387 | - } | ||
| 388 | - | ||
| 389 | - 5 -> { | ||
| 390 | - val modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2" | ||
| 391 | - return OfflineModelConfig( | ||
| 392 | - transducer = OfflineTransducerModelConfig( | ||
| 393 | - encoder = "$modelDir/encoder-epoch-20-avg-1.int8.onnx", | ||
| 394 | - decoder = "$modelDir/decoder-epoch-20-avg-1.onnx", | ||
| 395 | - joiner = "$modelDir/joiner-epoch-20-avg-1.int8.onnx", | ||
| 396 | - ), | ||
| 397 | - tokens = "$modelDir/tokens.txt", | ||
| 398 | - modelType = "zipformer2", | ||
| 399 | - ) | ||
| 400 | - } | ||
| 401 | - | ||
| 402 | - } | ||
| 403 | - return null | ||
| 404 | -} |
| 1 | -package com.k2fsa.sherpa.onnx | ||
| 2 | - | ||
| 3 | -import android.content.res.AssetManager | ||
| 4 | - | ||
| 5 | -class WaveReader { | ||
| 6 | - companion object { | ||
| 7 | - // Read a mono wave file asset | ||
| 8 | - // The returned array has two entries: | ||
| 9 | - // - the first entry contains an 1-D float array | ||
| 10 | - // - the second entry is the sample rate | ||
| 11 | - external fun readWaveFromAsset( | ||
| 12 | - assetManager: AssetManager, | ||
| 13 | - filename: String, | ||
| 14 | - ): Array<Any> | ||
| 15 | - | ||
| 16 | - // Read a mono wave file from disk | ||
| 17 | - // The returned array has two entries: | ||
| 18 | - // - the first entry contains an 1-D float array | ||
| 19 | - // - the second entry is the sample rate | ||
| 20 | - external fun readWaveFromFile( | ||
| 21 | - filename: String, | ||
| 22 | - ): Array<Any> | ||
| 23 | - | ||
| 24 | - init { | ||
| 25 | - System.loadLibrary("sherpa-onnx-jni") | ||
| 26 | - } | ||
| 27 | - } | ||
| 28 | -} |
android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/AudioTagging.kt
0 → 120000
| 1 | +../../../../../../../../../../../../sherpa-onnx/kotlin-api/AudioTagging.kt |
| @@ -46,7 +46,6 @@ import androidx.compose.ui.unit.dp | @@ -46,7 +46,6 @@ import androidx.compose.ui.unit.dp | ||
| 46 | import androidx.compose.ui.unit.sp | 46 | import androidx.compose.ui.unit.sp |
| 47 | import androidx.core.app.ActivityCompat | 47 | import androidx.core.app.ActivityCompat |
| 48 | import com.k2fsa.sherpa.onnx.AudioEvent | 48 | import com.k2fsa.sherpa.onnx.AudioEvent |
| 49 | -import com.k2fsa.sherpa.onnx.Tagger | ||
| 50 | import kotlin.concurrent.thread | 49 | import kotlin.concurrent.thread |
| 51 | 50 | ||
| 52 | 51 |
android/SherpaOnnxAudioTagging/app/src/main/java/com/k2fsa/sherpa/onnx/audio/tagging/MainActivity.kt
| @@ -13,13 +13,14 @@ import androidx.compose.material3.Surface | @@ -13,13 +13,14 @@ import androidx.compose.material3.Surface | ||
| 13 | import androidx.compose.runtime.Composable | 13 | import androidx.compose.runtime.Composable |
| 14 | import androidx.compose.ui.Modifier | 14 | import androidx.compose.ui.Modifier |
| 15 | import androidx.core.app.ActivityCompat | 15 | import androidx.core.app.ActivityCompat |
| 16 | -import com.k2fsa.sherpa.onnx.Tagger | ||
| 17 | import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme | 16 | import com.k2fsa.sherpa.onnx.audio.tagging.ui.theme.SherpaOnnxAudioTaggingTheme |
| 18 | 17 | ||
| 19 | const val TAG = "sherpa-onnx" | 18 | const val TAG = "sherpa-onnx" |
| 20 | 19 | ||
| 21 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | 20 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 |
| 22 | 21 | ||
| 22 | +// adb emu avd hostmicon | ||
| 23 | +// to enable mic inside the emulator | ||
| 23 | class MainActivity : ComponentActivity() { | 24 | class MainActivity : ComponentActivity() { |
| 24 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | 25 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) |
| 25 | override fun onCreate(savedInstanceState: Bundle?) { | 26 | override fun onCreate(savedInstanceState: Bundle?) { |
| 1 | +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt |
| 1 | -package com.k2fsa.sherpa.onnx | 1 | +package com.k2fsa.sherpa.onnx.audio.tagging |
| 2 | 2 | ||
| 3 | import android.content.res.AssetManager | 3 | import android.content.res.AssetManager |
| 4 | import android.util.Log | 4 | import android.util.Log |
| 5 | +import com.k2fsa.sherpa.onnx.AudioTagging | ||
| 6 | +import com.k2fsa.sherpa.onnx.getAudioTaggingConfig | ||
| 5 | 7 | ||
| 6 | 8 | ||
| 7 | object Tagger { | 9 | object Tagger { |
| @@ -17,7 +19,7 @@ object Tagger { | @@ -17,7 +19,7 @@ object Tagger { | ||
| 17 | return | 19 | return |
| 18 | } | 20 | } |
| 19 | 21 | ||
| 20 | - Log.i(TAG, "Initializing audio tagger") | 22 | + Log.i("sherpa-onnx", "Initializing audio tagger") |
| 21 | val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!! | 23 | val config = getAudioTaggingConfig(type = 0, numThreads = numThreads)!! |
| 22 | _tagger = AudioTagging(assetManager, config) | 24 | _tagger = AudioTagging(assetManager, config) |
| 23 | } | 25 | } |
| @@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button | @@ -33,7 +33,7 @@ import androidx.wear.compose.material.Button | ||
| 33 | import androidx.wear.compose.material.MaterialTheme | 33 | import androidx.wear.compose.material.MaterialTheme |
| 34 | import androidx.wear.compose.material.Text | 34 | import androidx.wear.compose.material.Text |
| 35 | import com.k2fsa.sherpa.onnx.AudioEvent | 35 | import com.k2fsa.sherpa.onnx.AudioEvent |
| 36 | -import com.k2fsa.sherpa.onnx.Tagger | 36 | +import com.k2fsa.sherpa.onnx.audio.tagging.Tagger |
| 37 | import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme | 37 | import com.k2fsa.sherpa.onnx.audio.tagging.wear.os.presentation.theme.SherpaOnnxAudioTaggingWearOsTheme |
| 38 | import kotlin.concurrent.thread | 38 | import kotlin.concurrent.thread |
| 39 | 39 |
| @@ -17,11 +17,14 @@ import androidx.activity.compose.setContent | @@ -17,11 +17,14 @@ import androidx.activity.compose.setContent | ||
| 17 | import androidx.compose.runtime.Composable | 17 | import androidx.compose.runtime.Composable |
| 18 | import androidx.core.app.ActivityCompat | 18 | import androidx.core.app.ActivityCompat |
| 19 | import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen | 19 | import androidx.core.splashscreen.SplashScreen.Companion.installSplashScreen |
| 20 | -import com.k2fsa.sherpa.onnx.Tagger | 20 | +import com.k2fsa.sherpa.onnx.audio.tagging.Tagger |
| 21 | 21 | ||
| 22 | const val TAG = "sherpa-onnx" | 22 | const val TAG = "sherpa-onnx" |
| 23 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | 23 | private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 |
| 24 | 24 | ||
| 25 | +// adb emu avd hostmicon | ||
| 26 | +// to enable mic inside the emulator | ||
| 27 | + | ||
| 25 | class MainActivity : ComponentActivity() { | 28 | class MainActivity : ComponentActivity() { |
| 26 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | 29 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) |
| 27 | override fun onCreate(savedInstanceState: Bundle?) { | 30 | override fun onCreate(savedInstanceState: Bundle?) { |
| @@ -15,7 +15,8 @@ | @@ -15,7 +15,8 @@ | ||
| 15 | android:theme="@style/Theme.SherpaOnnx" | 15 | android:theme="@style/Theme.SherpaOnnx" |
| 16 | tools:targetApi="31"> | 16 | tools:targetApi="31"> |
| 17 | <activity | 17 | <activity |
| 18 | - android:name=".MainActivity" | 18 | + android:name=".kws.MainActivity" |
| 19 | + android:label="Keyword-spotter" | ||
| 19 | android:exported="true"> | 20 | android:exported="true"> |
| 20 | <intent-filter> | 21 | <intent-filter> |
| 21 | <action android:name="android.intent.action.MAIN" /> | 22 | <action android:name="android.intent.action.MAIN" /> |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/KeywordSpotter.kt |
| 1 | -package com.k2fsa.sherpa.onnx | 1 | +package com.k2fsa.sherpa.onnx.kws |
| 2 | 2 | ||
| 3 | import android.Manifest | 3 | import android.Manifest |
| 4 | import android.content.pm.PackageManager | 4 | import android.content.pm.PackageManager |
| @@ -14,7 +14,13 @@ import android.widget.TextView | @@ -14,7 +14,13 @@ import android.widget.TextView | ||
| 14 | import android.widget.Toast | 14 | import android.widget.Toast |
| 15 | import androidx.appcompat.app.AppCompatActivity | 15 | import androidx.appcompat.app.AppCompatActivity |
| 16 | import androidx.core.app.ActivityCompat | 16 | import androidx.core.app.ActivityCompat |
| 17 | -import com.k2fsa.sherpa.onnx.* | 17 | +import com.k2fsa.sherpa.onnx.KeywordSpotter |
| 18 | +import com.k2fsa.sherpa.onnx.KeywordSpotterConfig | ||
| 19 | +import com.k2fsa.sherpa.onnx.OnlineStream | ||
| 20 | +import com.k2fsa.sherpa.onnx.R | ||
| 21 | +import com.k2fsa.sherpa.onnx.getFeatureConfig | ||
| 22 | +import com.k2fsa.sherpa.onnx.getKeywordsFile | ||
| 23 | +import com.k2fsa.sherpa.onnx.getKwsModelConfig | ||
| 18 | import kotlin.concurrent.thread | 24 | import kotlin.concurrent.thread |
| 19 | 25 | ||
| 20 | private const val TAG = "sherpa-onnx" | 26 | private const val TAG = "sherpa-onnx" |
| @@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | @@ -23,7 +29,8 @@ private const val REQUEST_RECORD_AUDIO_PERMISSION = 200 | ||
| 23 | class MainActivity : AppCompatActivity() { | 29 | class MainActivity : AppCompatActivity() { |
| 24 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | 30 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) |
| 25 | 31 | ||
| 26 | - private lateinit var model: SherpaOnnxKws | 32 | + private lateinit var kws: KeywordSpotter |
| 33 | + private lateinit var stream: OnlineStream | ||
| 27 | private var audioRecord: AudioRecord? = null | 34 | private var audioRecord: AudioRecord? = null |
| 28 | private lateinit var recordButton: Button | 35 | private lateinit var recordButton: Button |
| 29 | private lateinit var textView: TextView | 36 | private lateinit var textView: TextView |
| @@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() { | @@ -87,15 +94,18 @@ class MainActivity : AppCompatActivity() { | ||
| 87 | 94 | ||
| 88 | Log.i(TAG, keywords) | 95 | Log.i(TAG, keywords) |
| 89 | keywords = keywords.replace("\n", "/") | 96 | keywords = keywords.replace("\n", "/") |
| 97 | + keywords = keywords.trim() | ||
| 90 | // If keywords is an empty string, it just resets the decoding stream | 98 | // If keywords is an empty string, it just resets the decoding stream |
| 91 | // always returns true in this case. | 99 | // always returns true in this case. |
| 92 | // If keywords is not empty, it will create a new decoding stream with | 100 | // If keywords is not empty, it will create a new decoding stream with |
| 93 | // the given keywords appended to the default keywords. | 101 | // the given keywords appended to the default keywords. |
| 94 | - // Return false if errors occured when adding keywords, true otherwise. | ||
| 95 | - val status = model.reset(keywords) | ||
| 96 | - if (!status) { | ||
| 97 | - Log.i(TAG, "Failed to reset with keywords.") | ||
| 98 | - Toast.makeText(this, "Failed to set keywords.", Toast.LENGTH_LONG).show(); | 102 | + // Return false if errors occurred when adding keywords, true otherwise. |
| 103 | + stream.release() | ||
| 104 | + stream = kws.createStream(keywords) | ||
| 105 | + if (stream.ptr == 0L) { | ||
| 106 | + Log.i(TAG, "Failed to create stream with keywords: $keywords") | ||
| 107 | + Toast.makeText(this, "Failed to set keywords to $keywords.", Toast.LENGTH_LONG) | ||
| 108 | + .show() | ||
| 99 | return | 109 | return |
| 100 | } | 110 | } |
| 101 | 111 | ||
| @@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() { | @@ -122,6 +132,7 @@ class MainActivity : AppCompatActivity() { | ||
| 122 | audioRecord!!.release() | 132 | audioRecord!!.release() |
| 123 | audioRecord = null | 133 | audioRecord = null |
| 124 | recordButton.setText(R.string.start) | 134 | recordButton.setText(R.string.start) |
| 135 | + stream.release() | ||
| 125 | Log.i(TAG, "Stopped recording") | 136 | Log.i(TAG, "Stopped recording") |
| 126 | } | 137 | } |
| 127 | } | 138 | } |
| @@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() { | @@ -137,22 +148,22 @@ class MainActivity : AppCompatActivity() { | ||
| 137 | val ret = audioRecord?.read(buffer, 0, buffer.size) | 148 | val ret = audioRecord?.read(buffer, 0, buffer.size) |
| 138 | if (ret != null && ret > 0) { | 149 | if (ret != null && ret > 0) { |
| 139 | val samples = FloatArray(ret) { buffer[it] / 32768.0f } | 150 | val samples = FloatArray(ret) { buffer[it] / 32768.0f } |
| 140 | - model.acceptWaveform(samples, sampleRate=sampleRateInHz) | ||
| 141 | - while (model.isReady()) { | ||
| 142 | - model.decode() | 151 | + stream.acceptWaveform(samples, sampleRate = sampleRateInHz) |
| 152 | + while (kws.isReady(stream)) { | ||
| 153 | + kws.decode(stream) | ||
| 143 | } | 154 | } |
| 144 | 155 | ||
| 145 | - val text = model.keyword | 156 | + val text = kws.getResult(stream).keyword |
| 146 | 157 | ||
| 147 | - var textToDisplay = lastText; | 158 | + var textToDisplay = lastText |
| 148 | 159 | ||
| 149 | - if(text.isNotBlank()) { | 160 | + if (text.isNotBlank()) { |
| 150 | if (lastText.isBlank()) { | 161 | if (lastText.isBlank()) { |
| 151 | - textToDisplay = "${idx}: ${text}" | 162 | + textToDisplay = "$idx: $text" |
| 152 | } else { | 163 | } else { |
| 153 | - textToDisplay = "${idx}: ${text}\n${lastText}" | 164 | + textToDisplay = "$idx: $text\n$lastText" |
| 154 | } | 165 | } |
| 155 | - lastText = "${idx}: ${text}\n${lastText}" | 166 | + lastText = "$idx: $text\n$lastText" |
| 156 | idx += 1 | 167 | idx += 1 |
| 157 | } | 168 | } |
| 158 | 169 | ||
| @@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() { | @@ -188,20 +199,21 @@ class MainActivity : AppCompatActivity() { | ||
| 188 | } | 199 | } |
| 189 | 200 | ||
| 190 | private fun initModel() { | 201 | private fun initModel() { |
| 191 | - // Please change getModelConfig() to add new models | 202 | + // Please change getKwsModelConfig() to add new models |
| 192 | // See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html | 203 | // See https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html |
| 193 | // for a list of available models | 204 | // for a list of available models |
| 194 | val type = 0 | 205 | val type = 0 |
| 195 | - Log.i(TAG, "Select model type ${type}") | 206 | + Log.i(TAG, "Select model type $type") |
| 196 | val config = KeywordSpotterConfig( | 207 | val config = KeywordSpotterConfig( |
| 197 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), | 208 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), |
| 198 | - modelConfig = getModelConfig(type = type)!!, | ||
| 199 | - keywordsFile = getKeywordsFile(type = type)!!, | 209 | + modelConfig = getKwsModelConfig(type = type)!!, |
| 210 | + keywordsFile = getKeywordsFile(type = type), | ||
| 200 | ) | 211 | ) |
| 201 | 212 | ||
| 202 | - model = SherpaOnnxKws( | 213 | + kws = KeywordSpotter( |
| 203 | assetManager = application.assets, | 214 | assetManager = application.assets, |
| 204 | config = config, | 215 | config = config, |
| 205 | ) | 216 | ) |
| 217 | + stream = kws.createStream() | ||
| 206 | } | 218 | } |
| 207 | } | 219 | } |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineRecognizer.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt |
| 1 | -// Copyright (c) 2023 Xiaomi Corporation | ||
| 2 | -package com.k2fsa.sherpa.onnx | ||
| 3 | - | ||
| 4 | -import android.content.res.AssetManager | ||
| 5 | - | ||
| 6 | -class WaveReader { | ||
| 7 | - companion object { | ||
| 8 | - // Read a mono wave file asset | ||
| 9 | - // The returned array has two entries: | ||
| 10 | - // - the first entry contains an 1-D float array | ||
| 11 | - // - the second entry is the sample rate | ||
| 12 | - external fun readWaveFromAsset( | ||
| 13 | - assetManager: AssetManager, | ||
| 14 | - filename: String, | ||
| 15 | - ): Array<Any> | ||
| 16 | - | ||
| 17 | - // Read a mono wave file from disk | ||
| 18 | - // The returned array has two entries: | ||
| 19 | - // - the first entry contains an 1-D float array | ||
| 20 | - // - the second entry is the sample rate | ||
| 21 | - external fun readWaveFromFile( | ||
| 22 | - filename: String, | ||
| 23 | - ): Array<Any> | ||
| 24 | - | ||
| 25 | - init { | ||
| 26 | - System.loadLibrary("sherpa-onnx-jni") | ||
| 27 | - } | ||
| 28 | - } | ||
| 29 | -} |
| 1 | <resources> | 1 | <resources> |
| 2 | - <string name="app_name">KWS with Next-gen Kaldi</string> | 2 | + <string name="app_name">Keyword spotting</string> |
| 3 | <string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi. | 3 | <string name="hint">Click the Start button to play keyword spotting with Next-gen Kaldi. |
| 4 | \n | 4 | \n |
| 5 | \n\n\n | 5 | \n\n\n |
| 6 | The source code and pre-trained models are publicly available. | 6 | The source code and pre-trained models are publicly available. |
| 7 | Please see https://github.com/k2-fsa/sherpa-onnx for details. | 7 | Please see https://github.com/k2-fsa/sherpa-onnx for details. |
| 8 | </string> | 8 | </string> |
| 9 | - <string name="keyword_hint">Input your keywords here, one keyword perline.</string> | 9 | + <string name="keyword_hint">Input your keywords here, one keyword per line.\nTwo example keywords are given below:\n\nn ǐ h ǎo @你好\nd àn g ē d àn g ē @蛋哥蛋哥</string> |
| 10 | <string name="start">Start</string> | 10 | <string name="start">Start</string> |
| 11 | <string name="stop">Stop</string> | 11 | <string name="stop">Stop</string> |
| 12 | </resources> | 12 | </resources> |
| @@ -2,7 +2,7 @@ package com.k2fsa.sherpa.onnx.speaker.identification | @@ -2,7 +2,7 @@ package com.k2fsa.sherpa.onnx.speaker.identification | ||
| 2 | 2 | ||
| 3 | import androidx.compose.ui.graphics.vector.ImageVector | 3 | import androidx.compose.ui.graphics.vector.ImageVector |
| 4 | 4 | ||
| 5 | -data class BarItem ( | 5 | +data class BarItem( |
| 6 | val title: String, | 6 | val title: String, |
| 7 | 7 | ||
| 8 | // see https://www.composables.com/icons | 8 | // see https://www.composables.com/icons |
| 1 | package com.k2fsa.sherpa.onnx.speaker.identification | 1 | package com.k2fsa.sherpa.onnx.speaker.identification |
| 2 | 2 | ||
| 3 | sealed class NavRoutes(val route: String) { | 3 | sealed class NavRoutes(val route: String) { |
| 4 | - object Home: NavRoutes("home") | ||
| 5 | - object Register: NavRoutes("register") | ||
| 6 | - object View: NavRoutes("view") | ||
| 7 | - object Help: NavRoutes("help") | 4 | + object Home : NavRoutes("home") |
| 5 | + object Register : NavRoutes("register") | ||
| 6 | + object View : NavRoutes("view") | ||
| 7 | + object Help : NavRoutes("help") | ||
| 8 | } | 8 | } |
| 1 | +../../../../../../../../../../../../sherpa-onnx/kotlin-api/OnlineStream.kt |
| 1 | +../../../../../../../../../../../../sherpa-onnx/kotlin-api/Speaker.kt |
| 1 | -@file:OptIn(ExperimentalMaterial3Api::class, ExperimentalFoundationApi::class) | 1 | +@file:OptIn(ExperimentalMaterial3Api::class) |
| 2 | 2 | ||
| 3 | package com.k2fsa.sherpa.onnx.slid | 3 | package com.k2fsa.sherpa.onnx.slid |
| 4 | 4 | ||
| @@ -9,11 +9,9 @@ import android.media.AudioFormat | @@ -9,11 +9,9 @@ import android.media.AudioFormat | ||
| 9 | import android.media.AudioRecord | 9 | import android.media.AudioRecord |
| 10 | import android.media.MediaRecorder | 10 | import android.media.MediaRecorder |
| 11 | import android.util.Log | 11 | import android.util.Log |
| 12 | -import androidx.compose.foundation.ExperimentalFoundationApi | ||
| 13 | import androidx.compose.foundation.layout.Box | 12 | import androidx.compose.foundation.layout.Box |
| 14 | import androidx.compose.foundation.layout.Column | 13 | import androidx.compose.foundation.layout.Column |
| 15 | import androidx.compose.foundation.layout.PaddingValues | 14 | import androidx.compose.foundation.layout.PaddingValues |
| 16 | -import androidx.compose.ui.Modifier | ||
| 17 | import androidx.compose.foundation.layout.Spacer | 15 | import androidx.compose.foundation.layout.Spacer |
| 18 | import androidx.compose.foundation.layout.fillMaxSize | 16 | import androidx.compose.foundation.layout.fillMaxSize |
| 19 | import androidx.compose.foundation.layout.height | 17 | import androidx.compose.foundation.layout.height |
| @@ -31,6 +29,7 @@ import androidx.compose.runtime.mutableStateOf | @@ -31,6 +29,7 @@ import androidx.compose.runtime.mutableStateOf | ||
| 31 | import androidx.compose.runtime.remember | 29 | import androidx.compose.runtime.remember |
| 32 | import androidx.compose.runtime.setValue | 30 | import androidx.compose.runtime.setValue |
| 33 | import androidx.compose.ui.Alignment | 31 | import androidx.compose.ui.Alignment |
| 32 | +import androidx.compose.ui.Modifier | ||
| 34 | import androidx.compose.ui.platform.LocalContext | 33 | import androidx.compose.ui.platform.LocalContext |
| 35 | import androidx.compose.ui.text.font.FontWeight | 34 | import androidx.compose.ui.text.font.FontWeight |
| 36 | import androidx.compose.ui.unit.dp | 35 | import androidx.compose.ui.unit.dp |
| @@ -63,13 +62,13 @@ fun Home() { | @@ -63,13 +62,13 @@ fun Home() { | ||
| 63 | } | 62 | } |
| 64 | 63 | ||
| 65 | private var audioRecord: AudioRecord? = null | 64 | private var audioRecord: AudioRecord? = null |
| 66 | -private val sampleRateInHz = 16000 | 65 | +private const val sampleRateInHz = 16000 |
| 67 | 66 | ||
| 68 | @Composable | 67 | @Composable |
| 69 | fun MyApp(padding: PaddingValues) { | 68 | fun MyApp(padding: PaddingValues) { |
| 70 | val activity = LocalContext.current as Activity | 69 | val activity = LocalContext.current as Activity |
| 71 | var isStarted by remember { mutableStateOf(false) } | 70 | var isStarted by remember { mutableStateOf(false) } |
| 72 | - var result by remember { mutableStateOf<String>("") } | 71 | + var result by remember { mutableStateOf("") } |
| 73 | 72 | ||
| 74 | val onButtonClick: () -> Unit = { | 73 | val onButtonClick: () -> Unit = { |
| 75 | isStarted = !isStarted | 74 | isStarted = !isStarted |
| @@ -114,12 +113,12 @@ fun MyApp(padding: PaddingValues) { | @@ -114,12 +113,12 @@ fun MyApp(padding: PaddingValues) { | ||
| 114 | } | 113 | } |
| 115 | Log.i(TAG, "Stop recording") | 114 | Log.i(TAG, "Stop recording") |
| 116 | Log.i(TAG, "Start recognition") | 115 | Log.i(TAG, "Start recognition") |
| 117 | - val samples = Flatten(sampleList) | 116 | + val samples = flatten(sampleList) |
| 118 | val stream = Slid.slid.createStream() | 117 | val stream = Slid.slid.createStream() |
| 119 | stream.acceptWaveform(samples, sampleRateInHz) | 118 | stream.acceptWaveform(samples, sampleRateInHz) |
| 120 | val lang = Slid.slid.compute(stream) | 119 | val lang = Slid.slid.compute(stream) |
| 121 | 120 | ||
| 122 | - result = Slid.localeMap.get(lang) ?: lang | 121 | + result = Slid.localeMap[lang] ?: lang |
| 123 | 122 | ||
| 124 | stream.release() | 123 | stream.release() |
| 125 | } | 124 | } |
| @@ -152,7 +151,7 @@ fun MyApp(padding: PaddingValues) { | @@ -152,7 +151,7 @@ fun MyApp(padding: PaddingValues) { | ||
| 152 | } | 151 | } |
| 153 | } | 152 | } |
| 154 | 153 | ||
| 155 | -fun Flatten(sampleList: ArrayList<FloatArray>): FloatArray { | 154 | +fun flatten(sampleList: ArrayList<FloatArray>): FloatArray { |
| 156 | var totalSamples = 0 | 155 | var totalSamples = 0 |
| 157 | for (a in sampleList) { | 156 | for (a in sampleList) { |
| 158 | totalSamples += a.size | 157 | totalSamples += a.size |
| @@ -10,12 +10,9 @@ import androidx.activity.compose.setContent | @@ -10,12 +10,9 @@ import androidx.activity.compose.setContent | ||
| 10 | import androidx.compose.foundation.layout.fillMaxSize | 10 | import androidx.compose.foundation.layout.fillMaxSize |
| 11 | import androidx.compose.material3.MaterialTheme | 11 | import androidx.compose.material3.MaterialTheme |
| 12 | import androidx.compose.material3.Surface | 12 | import androidx.compose.material3.Surface |
| 13 | -import androidx.compose.material3.Text | ||
| 14 | import androidx.compose.runtime.Composable | 13 | import androidx.compose.runtime.Composable |
| 15 | import androidx.compose.ui.Modifier | 14 | import androidx.compose.ui.Modifier |
| 16 | -import androidx.compose.ui.tooling.preview.Preview | ||
| 17 | import androidx.core.app.ActivityCompat | 15 | import androidx.core.app.ActivityCompat |
| 18 | -import com.k2fsa.sherpa.onnx.SpokenLanguageIdentification | ||
| 19 | import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme | 16 | import com.k2fsa.sherpa.onnx.slid.ui.theme.SherpaOnnxSpokenLanguageIdentificationTheme |
| 20 | 17 | ||
| 21 | const val TAG = "sherpa-onnx" | 18 | const val TAG = "sherpa-onnx" |
| @@ -32,6 +29,7 @@ class MainActivity : ComponentActivity() { | @@ -32,6 +29,7 @@ class MainActivity : ComponentActivity() { | ||
| 32 | ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) | 29 | ActivityCompat.requestPermissions(this, permissions, REQUEST_RECORD_AUDIO_PERMISSION) |
| 33 | Slid.initSlid(this.assets) | 30 | Slid.initSlid(this.assets) |
| 34 | } | 31 | } |
| 32 | + | ||
| 35 | @Suppress("DEPRECATION") | 33 | @Suppress("DEPRECATION") |
| 36 | @Deprecated("Deprecated in Java") | 34 | @Deprecated("Deprecated in Java") |
| 37 | override fun onRequestPermissionsResult( | 35 | override fun onRequestPermissionsResult( |
| 1 | +../../../../../../../../../../../sherpa-onnx/kotlin-api/SpokenLanguageIdentification.kt |
| @@ -15,7 +15,7 @@ object Slid { | @@ -15,7 +15,7 @@ object Slid { | ||
| 15 | get() { | 15 | get() { |
| 16 | return _slid!! | 16 | return _slid!! |
| 17 | } | 17 | } |
| 18 | - val localeMap : Map<String, String> | 18 | + val localeMap: Map<String, String> |
| 19 | get() { | 19 | get() { |
| 20 | return _localeMap | 20 | return _localeMap |
| 21 | } | 21 | } |
| @@ -31,7 +31,7 @@ object Slid { | @@ -31,7 +31,7 @@ object Slid { | ||
| 31 | } | 31 | } |
| 32 | 32 | ||
| 33 | if (_localeMap.isEmpty()) { | 33 | if (_localeMap.isEmpty()) { |
| 34 | - val allLang = Locale.getISOLanguages(); | 34 | + val allLang = Locale.getISOLanguages() |
| 35 | for (lang in allLang) { | 35 | for (lang in allLang) { |
| 36 | val locale = Locale(lang) | 36 | val locale = Locale(lang) |
| 37 | _localeMap[lang] = locale.displayName | 37 | _localeMap[lang] = locale.displayName |
| 1 | package com.k2fsa.sherpa.onnx | 1 | package com.k2fsa.sherpa.onnx |
| 2 | 2 | ||
| 3 | import android.content.res.AssetManager | 3 | import android.content.res.AssetManager |
| 4 | -import android.media.* | 4 | +import android.media.AudioAttributes |
| 5 | +import android.media.AudioFormat | ||
| 6 | +import android.media.AudioManager | ||
| 7 | +import android.media.AudioTrack | ||
| 8 | +import android.media.MediaPlayer | ||
| 5 | import android.net.Uri | 9 | import android.net.Uri |
| 6 | import android.os.Bundle | 10 | import android.os.Bundle |
| 7 | import android.util.Log | 11 | import android.util.Log |
| @@ -212,7 +216,7 @@ class MainActivity : AppCompatActivity() { | @@ -212,7 +216,7 @@ class MainActivity : AppCompatActivity() { | ||
| 212 | } | 216 | } |
| 213 | 217 | ||
| 214 | if (dictDir != null) { | 218 | if (dictDir != null) { |
| 215 | - val newDir = copyDataDir( modelDir!!) | 219 | + val newDir = copyDataDir(modelDir!!) |
| 216 | modelDir = newDir + "/" + modelDir | 220 | modelDir = newDir + "/" + modelDir |
| 217 | dictDir = modelDir + "/" + "dict" | 221 | dictDir = modelDir + "/" + "dict" |
| 218 | ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | 222 | ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" |
| @@ -220,7 +224,9 @@ class MainActivity : AppCompatActivity() { | @@ -220,7 +224,9 @@ class MainActivity : AppCompatActivity() { | ||
| 220 | } | 224 | } |
| 221 | 225 | ||
| 222 | val config = getOfflineTtsConfig( | 226 | val config = getOfflineTtsConfig( |
| 223 | - modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "", | 227 | + modelDir = modelDir!!, |
| 228 | + modelName = modelName!!, | ||
| 229 | + lexicon = lexicon ?: "", | ||
| 224 | dataDir = dataDir ?: "", | 230 | dataDir = dataDir ?: "", |
| 225 | dictDir = dictDir ?: "", | 231 | dictDir = dictDir ?: "", |
| 226 | ruleFsts = ruleFsts ?: "", | 232 | ruleFsts = ruleFsts ?: "", |
| @@ -232,11 +238,11 @@ class MainActivity : AppCompatActivity() { | @@ -232,11 +238,11 @@ class MainActivity : AppCompatActivity() { | ||
| 232 | 238 | ||
| 233 | 239 | ||
| 234 | private fun copyDataDir(dataDir: String): String { | 240 | private fun copyDataDir(dataDir: String): String { |
| 235 | - println("data dir is $dataDir") | 241 | + Log.i(TAG, "data dir is $dataDir") |
| 236 | copyAssets(dataDir) | 242 | copyAssets(dataDir) |
| 237 | 243 | ||
| 238 | val newDataDir = application.getExternalFilesDir(null)!!.absolutePath | 244 | val newDataDir = application.getExternalFilesDir(null)!!.absolutePath |
| 239 | - println("newDataDir: $newDataDir") | 245 | + Log.i(TAG, "newDataDir: $newDataDir") |
| 240 | return newDataDir | 246 | return newDataDir |
| 241 | } | 247 | } |
| 242 | 248 | ||
| @@ -256,7 +262,7 @@ class MainActivity : AppCompatActivity() { | @@ -256,7 +262,7 @@ class MainActivity : AppCompatActivity() { | ||
| 256 | } | 262 | } |
| 257 | } | 263 | } |
| 258 | } catch (ex: IOException) { | 264 | } catch (ex: IOException) { |
| 259 | - Log.e(TAG, "Failed to copy $path. ${ex.toString()}") | 265 | + Log.e(TAG, "Failed to copy $path. $ex") |
| 260 | } | 266 | } |
| 261 | } | 267 | } |
| 262 | 268 | ||
| @@ -276,7 +282,7 @@ class MainActivity : AppCompatActivity() { | @@ -276,7 +282,7 @@ class MainActivity : AppCompatActivity() { | ||
| 276 | ostream.flush() | 282 | ostream.flush() |
| 277 | ostream.close() | 283 | ostream.close() |
| 278 | } catch (ex: Exception) { | 284 | } catch (ex: Exception) { |
| 279 | - Log.e(TAG, "Failed to copy $filename, ${ex.toString()}") | 285 | + Log.e(TAG, "Failed to copy $filename, $ex") |
| 280 | } | 286 | } |
| 281 | } | 287 | } |
| 282 | } | 288 | } |
| @@ -49,10 +49,10 @@ class OfflineTts( | @@ -49,10 +49,10 @@ class OfflineTts( | ||
| 49 | private var ptr: Long | 49 | private var ptr: Long |
| 50 | 50 | ||
| 51 | init { | 51 | init { |
| 52 | - if (assetManager != null) { | ||
| 53 | - ptr = newFromAsset(assetManager, config) | 52 | + ptr = if (assetManager != null) { |
| 53 | + newFromAsset(assetManager, config) | ||
| 54 | } else { | 54 | } else { |
| 55 | - ptr = newFromFile(config) | 55 | + newFromFile(config) |
| 56 | } | 56 | } |
| 57 | } | 57 | } |
| 58 | 58 | ||
| @@ -65,7 +65,7 @@ class OfflineTts( | @@ -65,7 +65,7 @@ class OfflineTts( | ||
| 65 | sid: Int = 0, | 65 | sid: Int = 0, |
| 66 | speed: Float = 1.0f | 66 | speed: Float = 1.0f |
| 67 | ): GeneratedAudio { | 67 | ): GeneratedAudio { |
| 68 | - var objArray = generateImpl(ptr, text = text, sid = sid, speed = speed) | 68 | + val objArray = generateImpl(ptr, text = text, sid = sid, speed = speed) |
| 69 | return GeneratedAudio( | 69 | return GeneratedAudio( |
| 70 | samples = objArray[0] as FloatArray, | 70 | samples = objArray[0] as FloatArray, |
| 71 | sampleRate = objArray[1] as Int | 71 | sampleRate = objArray[1] as Int |
| @@ -78,7 +78,13 @@ class OfflineTts( | @@ -78,7 +78,13 @@ class OfflineTts( | ||
| 78 | speed: Float = 1.0f, | 78 | speed: Float = 1.0f, |
| 79 | callback: (samples: FloatArray) -> Unit | 79 | callback: (samples: FloatArray) -> Unit |
| 80 | ): GeneratedAudio { | 80 | ): GeneratedAudio { |
| 81 | - var objArray = generateWithCallbackImpl(ptr, text = text, sid = sid, speed = speed, callback=callback) | 81 | + val objArray = generateWithCallbackImpl( |
| 82 | + ptr, | ||
| 83 | + text = text, | ||
| 84 | + sid = sid, | ||
| 85 | + speed = speed, | ||
| 86 | + callback = callback | ||
| 87 | + ) | ||
| 82 | return GeneratedAudio( | 88 | return GeneratedAudio( |
| 83 | samples = objArray[0] as FloatArray, | 89 | samples = objArray[0] as FloatArray, |
| 84 | sampleRate = objArray[1] as Int | 90 | sampleRate = objArray[1] as Int |
| @@ -87,10 +93,10 @@ class OfflineTts( | @@ -87,10 +93,10 @@ class OfflineTts( | ||
| 87 | 93 | ||
| 88 | fun allocate(assetManager: AssetManager? = null) { | 94 | fun allocate(assetManager: AssetManager? = null) { |
| 89 | if (ptr == 0L) { | 95 | if (ptr == 0L) { |
| 90 | - if (assetManager != null) { | ||
| 91 | - ptr = newFromAsset(assetManager, config) | 96 | + ptr = if (assetManager != null) { |
| 97 | + newFromAsset(assetManager, config) | ||
| 92 | } else { | 98 | } else { |
| 93 | - ptr = newFromFile(config) | 99 | + newFromFile(config) |
| 94 | } | 100 | } |
| 95 | } | 101 | } |
| 96 | } | 102 | } |
| @@ -103,8 +109,13 @@ class OfflineTts( | @@ -103,8 +109,13 @@ class OfflineTts( | ||
| 103 | } | 109 | } |
| 104 | 110 | ||
| 105 | protected fun finalize() { | 111 | protected fun finalize() { |
| 112 | + if (ptr != 0L) { | ||
| 106 | delete(ptr) | 113 | delete(ptr) |
| 114 | + ptr = 0 | ||
| 107 | } | 115 | } |
| 116 | + } | ||
| 117 | + | ||
| 118 | + fun release() = finalize() | ||
| 108 | 119 | ||
| 109 | private external fun newFromAsset( | 120 | private external fun newFromAsset( |
| 110 | assetManager: AssetManager, | 121 | assetManager: AssetManager, |
| @@ -123,14 +134,14 @@ class OfflineTts( | @@ -123,14 +134,14 @@ class OfflineTts( | ||
| 123 | // - the first entry is an 1-D float array containing audio samples. | 134 | // - the first entry is an 1-D float array containing audio samples. |
| 124 | // Each sample is normalized to the range [-1, 1] | 135 | // Each sample is normalized to the range [-1, 1] |
| 125 | // - the second entry is the sample rate | 136 | // - the second entry is the sample rate |
| 126 | - external fun generateImpl( | 137 | + private external fun generateImpl( |
| 127 | ptr: Long, | 138 | ptr: Long, |
| 128 | text: String, | 139 | text: String, |
| 129 | sid: Int = 0, | 140 | sid: Int = 0, |
| 130 | speed: Float = 1.0f | 141 | speed: Float = 1.0f |
| 131 | ): Array<Any> | 142 | ): Array<Any> |
| 132 | 143 | ||
| 133 | - external fun generateWithCallbackImpl( | 144 | + private external fun generateWithCallbackImpl( |
| 134 | ptr: Long, | 145 | ptr: Long, |
| 135 | text: String, | 146 | text: String, |
| 136 | sid: Int = 0, | 147 | sid: Int = 0, |
| @@ -156,7 +167,7 @@ fun getOfflineTtsConfig( | @@ -156,7 +167,7 @@ fun getOfflineTtsConfig( | ||
| 156 | dictDir: String, | 167 | dictDir: String, |
| 157 | ruleFsts: String, | 168 | ruleFsts: String, |
| 158 | ruleFars: String | 169 | ruleFars: String |
| 159 | -): OfflineTtsConfig? { | 170 | +): OfflineTtsConfig { |
| 160 | return OfflineTtsConfig( | 171 | return OfflineTtsConfig( |
| 161 | model = OfflineTtsModelConfig( | 172 | model = OfflineTtsModelConfig( |
| 162 | vits = OfflineTtsVitsModelConfig( | 173 | vits = OfflineTtsVitsModelConfig( |
| 1 | package com.k2fsa.sherpa.onnx.tts.engine | 1 | package com.k2fsa.sherpa.onnx.tts.engine |
| 2 | 2 | ||
| 3 | import android.content.Intent | 3 | import android.content.Intent |
| 4 | -import androidx.appcompat.app.AppCompatActivity | ||
| 5 | import android.os.Bundle | 4 | import android.os.Bundle |
| 6 | import android.speech.tts.TextToSpeech | 5 | import android.speech.tts.TextToSpeech |
| 6 | +import androidx.appcompat.app.AppCompatActivity | ||
| 7 | 7 | ||
| 8 | class CheckVoiceData : AppCompatActivity() { | 8 | class CheckVoiceData : AppCompatActivity() { |
| 9 | override fun onCreate(savedInstanceState: Bundle?) { | 9 | override fun onCreate(savedInstanceState: Bundle?) { |
| 10 | super.onCreate(savedInstanceState) | 10 | super.onCreate(savedInstanceState) |
| 11 | val intent = Intent().apply { | 11 | val intent = Intent().apply { |
| 12 | - putStringArrayListExtra(TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, arrayListOf(TtsEngine.lang)) | 12 | + putStringArrayListExtra( |
| 13 | + TextToSpeech.Engine.EXTRA_AVAILABLE_VOICES, | ||
| 14 | + arrayListOf(TtsEngine.lang) | ||
| 15 | + ) | ||
| 13 | putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf()) | 16 | putStringArrayListExtra(TextToSpeech.Engine.EXTRA_UNAVAILABLE_VOICES, arrayListOf()) |
| 14 | } | 17 | } |
| 15 | setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent) | 18 | setResult(TextToSpeech.Engine.CHECK_VOICE_DATA_PASS, intent) |
| @@ -2,7 +2,6 @@ package com.k2fsa.sherpa.onnx.tts.engine | @@ -2,7 +2,6 @@ package com.k2fsa.sherpa.onnx.tts.engine | ||
| 2 | 2 | ||
| 3 | import android.app.Activity | 3 | import android.app.Activity |
| 4 | import android.content.Intent | 4 | import android.content.Intent |
| 5 | -import androidx.appcompat.app.AppCompatActivity | ||
| 6 | import android.os.Bundle | 5 | import android.os.Bundle |
| 7 | import android.speech.tts.TextToSpeech | 6 | import android.speech.tts.TextToSpeech |
| 8 | 7 | ||
| @@ -12,120 +11,168 @@ fun getSampleText(lang: String): String { | @@ -12,120 +11,168 @@ fun getSampleText(lang: String): String { | ||
| 12 | "ara" -> { | 11 | "ara" -> { |
| 13 | text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي" | 12 | text = "هذا هو محرك تحويل النص إلى كلام باستخدام الجيل القادم من كالدي" |
| 14 | } | 13 | } |
| 14 | + | ||
| 15 | "ben" -> { | 15 | "ben" -> { |
| 16 | text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে" | 16 | text = "এটি একটি টেক্সট-টু-স্পীচ ইঞ্জিন যা পরবর্তী প্রজন্মের কালডি ব্যবহার করে" |
| 17 | } | 17 | } |
| 18 | + | ||
| 18 | "bul" -> { | 19 | "bul" -> { |
| 19 | - text = "Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение" | 20 | + text = |
| 21 | + "Това е машина за преобразуване на текст в реч, използваща Kaldi от следващо поколение" | ||
| 20 | } | 22 | } |
| 23 | + | ||
| 21 | "cat" -> { | 24 | "cat" -> { |
| 22 | text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació" | 25 | text = "Aquest és un motor de text a veu que utilitza Kaldi de nova generació" |
| 23 | } | 26 | } |
| 27 | + | ||
| 24 | "ces" -> { | 28 | "ces" -> { |
| 25 | text = "Toto je převodník textu na řeč využívající novou generaci kaldi" | 29 | text = "Toto je převodník textu na řeč využívající novou generaci kaldi" |
| 26 | } | 30 | } |
| 31 | + | ||
| 27 | "dan" -> { | 32 | "dan" -> { |
| 28 | text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi" | 33 | text = "Dette er en tekst til tale-motor, der bruger næste generation af kaldi" |
| 29 | } | 34 | } |
| 35 | + | ||
| 30 | "deu" -> { | 36 | "deu" -> { |
| 31 | - text = "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet" | 37 | + text = |
| 38 | + "Dies ist eine Text-to-Speech-Engine, die Kaldi der nächsten Generation verwendet" | ||
| 32 | } | 39 | } |
| 40 | + | ||
| 33 | "ell" -> { | 41 | "ell" -> { |
| 34 | text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς" | 42 | text = "Αυτή είναι μια μηχανή κειμένου σε ομιλία που χρησιμοποιεί kaldi επόμενης γενιάς" |
| 35 | } | 43 | } |
| 44 | + | ||
| 36 | "eng" -> { | 45 | "eng" -> { |
| 37 | text = "This is a text-to-speech engine using next generation Kaldi" | 46 | text = "This is a text-to-speech engine using next generation Kaldi" |
| 38 | } | 47 | } |
| 48 | + | ||
| 39 | "est" -> { | 49 | "est" -> { |
| 40 | text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi" | 50 | text = "See on teksti kõneks muutmise mootor, mis kasutab järgmise põlvkonna Kaldi" |
| 41 | } | 51 | } |
| 52 | + | ||
| 42 | "fin" -> { | 53 | "fin" -> { |
| 43 | text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia" | 54 | text = "Tämä on tekstistä puheeksi -moottori, joka käyttää seuraavan sukupolven kaldia" |
| 44 | } | 55 | } |
| 56 | + | ||
| 45 | "fra" -> { | 57 | "fra" -> { |
| 46 | text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération" | 58 | text = "Il s'agit d'un moteur de synthèse vocale utilisant Kaldi de nouvelle génération" |
| 47 | } | 59 | } |
| 60 | + | ||
| 48 | "gle" -> { | 61 | "gle" -> { |
| 49 | text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile" | 62 | text = "Is inneall téacs-go-hurlabhra é seo a úsáideann Kaldi den chéad ghlúin eile" |
| 50 | } | 63 | } |
| 64 | + | ||
| 51 | "hrv" -> { | 65 | "hrv" -> { |
| 52 | - text = "Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije" | 66 | + text = |
| 67 | + "Ovo je mehanizam za pretvaranje teksta u govor koji koristi Kaldi sljedeće generacije" | ||
| 53 | } | 68 | } |
| 69 | + | ||
| 54 | "hun" -> { | 70 | "hun" -> { |
| 55 | text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával" | 71 | text = "Ez egy szövegfelolvasó motor a következő generációs kaldi használatával" |
| 56 | } | 72 | } |
| 73 | + | ||
| 57 | "isl" -> { | 74 | "isl" -> { |
| 58 | text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi" | 75 | text = "Þetta er texta í tal vél sem notar næstu kynslóð kaldi" |
| 59 | } | 76 | } |
| 77 | + | ||
| 60 | "ita" -> { | 78 | "ita" -> { |
| 61 | text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione" | 79 | text = "Questo è un motore di sintesi vocale che utilizza kaldi di nuova generazione" |
| 62 | } | 80 | } |
| 81 | + | ||
| 63 | "kat" -> { | 82 | "kat" -> { |
| 64 | text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით" | 83 | text = "ეს არის ტექსტიდან მეტყველების ძრავა შემდეგი თაობის კალდის გამოყენებით" |
| 65 | } | 84 | } |
| 85 | + | ||
| 66 | "kaz" -> { | 86 | "kaz" -> { |
| 67 | text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш" | 87 | text = "Бұл келесі буын kaldi көмегімен мәтіннен сөйлеуге арналған қозғалтқыш" |
| 68 | } | 88 | } |
| 89 | + | ||
| 69 | "mlt" -> { | 90 | "mlt" -> { |
| 70 | text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss" | 91 | text = "Din hija magna text-to-speech li tuża Kaldi tal-ġenerazzjoni li jmiss" |
| 71 | } | 92 | } |
| 93 | + | ||
| 72 | "lav" -> { | 94 | "lav" -> { |
| 73 | text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi" | 95 | text = "Šis ir teksta pārvēršanas runā dzinējs, kas izmanto nākamās paaudzes Kaldi" |
| 74 | } | 96 | } |
| 97 | + | ||
| 75 | "lit" -> { | 98 | "lit" -> { |
| 76 | text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi" | 99 | text = "Tai teksto į kalbą variklis, kuriame naudojamas naujos kartos Kaldi" |
| 77 | } | 100 | } |
| 101 | + | ||
| 78 | "ltz" -> { | 102 | "ltz" -> { |
| 79 | text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi" | 103 | text = "Dëst ass en Text-zu-Speech-Motor mat der nächster Generatioun Kaldi" |
| 80 | } | 104 | } |
| 105 | + | ||
| 81 | "nep" -> { | 106 | "nep" -> { |
| 82 | text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो" | 107 | text = "यो अर्को पुस्ता काल्डी प्रयोग गरेर स्पीच इन्जिनको पाठ हो" |
| 83 | } | 108 | } |
| 109 | + | ||
| 84 | "nld" -> { | 110 | "nld" -> { |
| 85 | - text = "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie" | 111 | + text = |
| 112 | + "Dit is een tekst-naar-spraak-engine die gebruik maakt van Kaldi van de volgende generatie" | ||
| 86 | } | 113 | } |
| 114 | + | ||
| 87 | "nor" -> { | 115 | "nor" -> { |
| 88 | text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi" | 116 | text = "Dette er en tekst til tale-motor som bruker neste generasjons kaldi" |
| 89 | } | 117 | } |
| 118 | + | ||
| 90 | "pol" -> { | 119 | "pol" -> { |
| 91 | text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji" | 120 | text = "Jest to silnik syntezatora mowy wykorzystujący Kaldi nowej generacji" |
| 92 | } | 121 | } |
| 122 | + | ||
| 93 | "por" -> { | 123 | "por" -> { |
| 94 | - text = "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração" | 124 | + text = |
| 125 | + "Este é um mecanismo de conversão de texto em fala usando Kaldi de próxima geração" | ||
| 95 | } | 126 | } |
| 127 | + | ||
| 96 | "ron" -> { | 128 | "ron" -> { |
| 97 | text = "Acesta este un motor text to speech care folosește generația următoare de kadi" | 129 | text = "Acesta este un motor text to speech care folosește generația următoare de kadi" |
| 98 | } | 130 | } |
| 131 | + | ||
| 99 | "rus" -> { | 132 | "rus" -> { |
| 100 | - text = "Это движок преобразования текста в речь, использующий Kaldi следующего поколения." | 133 | + text = |
| 134 | + "Это движок преобразования текста в речь, использующий Kaldi следующего поколения." | ||
| 101 | } | 135 | } |
| 136 | + | ||
| 102 | "slk" -> { | 137 | "slk" -> { |
| 103 | text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie" | 138 | text = "Toto je nástroj na prevod textu na reč využívajúci kaldi novej generácie" |
| 104 | } | 139 | } |
| 140 | + | ||
| 105 | "slv" -> { | 141 | "slv" -> { |
| 106 | - text = "To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije" | 142 | + text = |
| 143 | + "To je mehanizem za pretvorbo besedila v govor, ki uporablja Kaldi naslednje generacije" | ||
| 107 | } | 144 | } |
| 145 | + | ||
| 108 | "spa" -> { | 146 | "spa" -> { |
| 109 | text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación." | 147 | text = "Este es un motor de texto a voz que utiliza kaldi de próxima generación." |
| 110 | } | 148 | } |
| 149 | + | ||
| 111 | "srp" -> { | 150 | "srp" -> { |
| 112 | - text = "Ово је механизам за претварање текста у говор који користи калди следеће генерације" | 151 | + text = |
| 152 | + "Ово је механизам за претварање текста у говор који користи калди следеће генерације" | ||
| 113 | } | 153 | } |
| 154 | + | ||
| 114 | "swa" -> { | 155 | "swa" -> { |
| 115 | text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi" | 156 | text = "Haya ni maandishi kwa injini ya hotuba kwa kutumia kizazi kijacho kaldi" |
| 116 | } | 157 | } |
| 158 | + | ||
| 117 | "swe" -> { | 159 | "swe" -> { |
| 118 | text = "Detta är en text till tal-motor som använder nästa generations kaldi" | 160 | text = "Detta är en text till tal-motor som använder nästa generations kaldi" |
| 119 | } | 161 | } |
| 162 | + | ||
| 120 | "tur" -> { | 163 | "tur" -> { |
| 121 | text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur" | 164 | text = "Bu, yeni nesil kaldi'yi kullanan bir metinden konuşmaya motorudur" |
| 122 | } | 165 | } |
| 166 | + | ||
| 123 | "ukr" -> { | 167 | "ukr" -> { |
| 124 | - text = "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління" | 168 | + text = |
| 169 | + "Це механізм перетворення тексту на мовлення, який використовує kaldi нового покоління" | ||
| 125 | } | 170 | } |
| 171 | + | ||
| 126 | "vie" -> { | 172 | "vie" -> { |
| 127 | text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo" | 173 | text = "Đây là công cụ chuyển văn bản thành giọng nói sử dụng kaldi thế hệ tiếp theo" |
| 128 | } | 174 | } |
| 175 | + | ||
| 129 | "zho", "cmn" -> { | 176 | "zho", "cmn" -> { |
| 130 | text = "使用新一代卡尔迪的语音合成引擎" | 177 | text = "使用新一代卡尔迪的语音合成引擎" |
| 131 | } | 178 | } |
| @@ -137,13 +184,13 @@ class GetSampleText : Activity() { | @@ -137,13 +184,13 @@ class GetSampleText : Activity() { | ||
| 137 | override fun onCreate(savedInstanceState: Bundle?) { | 184 | override fun onCreate(savedInstanceState: Bundle?) { |
| 138 | super.onCreate(savedInstanceState) | 185 | super.onCreate(savedInstanceState) |
| 139 | var result = TextToSpeech.LANG_AVAILABLE | 186 | var result = TextToSpeech.LANG_AVAILABLE |
| 140 | - var text: String = getSampleText(TtsEngine.lang ?: "") | 187 | + val text: String = getSampleText(TtsEngine.lang ?: "") |
| 141 | if (text.isEmpty()) { | 188 | if (text.isEmpty()) { |
| 142 | result = TextToSpeech.LANG_NOT_SUPPORTED | 189 | result = TextToSpeech.LANG_NOT_SUPPORTED |
| 143 | } | 190 | } |
| 144 | 191 | ||
| 145 | - val intent = Intent().apply{ | ||
| 146 | - if(result == TextToSpeech.LANG_AVAILABLE) { | 192 | + val intent = Intent().apply { |
| 193 | + if (result == TextToSpeech.LANG_AVAILABLE) { | ||
| 147 | putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text) | 194 | putExtra(TextToSpeech.Engine.EXTRA_SAMPLE_TEXT, text) |
| 148 | } else { | 195 | } else { |
| 149 | putExtra("sampleText", text) | 196 | putExtra("sampleText", text) |
| @@ -26,20 +26,16 @@ import androidx.compose.material3.Scaffold | @@ -26,20 +26,16 @@ import androidx.compose.material3.Scaffold | ||
| 26 | import androidx.compose.material3.Slider | 26 | import androidx.compose.material3.Slider |
| 27 | import androidx.compose.material3.Surface | 27 | import androidx.compose.material3.Surface |
| 28 | import androidx.compose.material3.Text | 28 | import androidx.compose.material3.Text |
| 29 | -import androidx.compose.material3.TextField | ||
| 30 | import androidx.compose.material3.TopAppBar | 29 | import androidx.compose.material3.TopAppBar |
| 31 | -import androidx.compose.runtime.Composable | ||
| 32 | import androidx.compose.runtime.getValue | 30 | import androidx.compose.runtime.getValue |
| 33 | import androidx.compose.runtime.mutableStateOf | 31 | import androidx.compose.runtime.mutableStateOf |
| 34 | import androidx.compose.runtime.remember | 32 | import androidx.compose.runtime.remember |
| 35 | import androidx.compose.runtime.setValue | 33 | import androidx.compose.runtime.setValue |
| 36 | import androidx.compose.ui.Modifier | 34 | import androidx.compose.ui.Modifier |
| 37 | import androidx.compose.ui.text.input.KeyboardType | 35 | import androidx.compose.ui.text.input.KeyboardType |
| 38 | -import androidx.compose.ui.tooling.preview.Preview | ||
| 39 | import androidx.compose.ui.unit.dp | 36 | import androidx.compose.ui.unit.dp |
| 40 | import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme | 37 | import com.k2fsa.sherpa.onnx.tts.engine.ui.theme.SherpaOnnxTtsEngineTheme |
| 41 | import java.io.File | 38 | import java.io.File |
| 42 | -import java.lang.NumberFormatException | ||
| 43 | 39 | ||
| 44 | const val TAG = "sherpa-onnx-tts-engine" | 40 | const val TAG = "sherpa-onnx-tts-engine" |
| 45 | 41 | ||
| @@ -88,7 +84,7 @@ class MainActivity : ComponentActivity() { | @@ -88,7 +84,7 @@ class MainActivity : ComponentActivity() { | ||
| 88 | try { | 84 | try { |
| 89 | TtsEngine.speakerId = it.toString().toInt() | 85 | TtsEngine.speakerId = it.toString().toInt() |
| 90 | } catch (ex: NumberFormatException) { | 86 | } catch (ex: NumberFormatException) { |
| 91 | - Log.i(TAG, "Invalid input: ${it}") | 87 | + Log.i(TAG, "Invalid input: $it") |
| 92 | TtsEngine.speakerId = 0 | 88 | TtsEngine.speakerId = 0 |
| 93 | } | 89 | } |
| 94 | } | 90 | } |
| @@ -119,7 +115,7 @@ class MainActivity : ComponentActivity() { | @@ -119,7 +115,7 @@ class MainActivity : ComponentActivity() { | ||
| 119 | Button( | 115 | Button( |
| 120 | modifier = Modifier.padding(20.dp), | 116 | modifier = Modifier.padding(20.dp), |
| 121 | onClick = { | 117 | onClick = { |
| 122 | - Log.i(TAG, "Clicked, text: ${testText}") | 118 | + Log.i(TAG, "Clicked, text: $testText") |
| 123 | if (testText.isBlank() || testText.isEmpty()) { | 119 | if (testText.isBlank() || testText.isEmpty()) { |
| 124 | Toast.makeText( | 120 | Toast.makeText( |
| 125 | applicationContext, | 121 | applicationContext, |
| @@ -136,7 +132,7 @@ class MainActivity : ComponentActivity() { | @@ -136,7 +132,7 @@ class MainActivity : ComponentActivity() { | ||
| 136 | val filename = | 132 | val filename = |
| 137 | application.filesDir.absolutePath + "/generated.wav" | 133 | application.filesDir.absolutePath + "/generated.wav" |
| 138 | val ok = | 134 | val ok = |
| 139 | - audio.samples.size > 0 && audio.save(filename) | 135 | + audio.samples.isNotEmpty() && audio.save(filename) |
| 140 | 136 | ||
| 141 | if (ok) { | 137 | if (ok) { |
| 142 | stopMediaPlayer() | 138 | stopMediaPlayer() |
| @@ -4,8 +4,10 @@ import android.content.Context | @@ -4,8 +4,10 @@ import android.content.Context | ||
| 4 | import android.content.res.AssetManager | 4 | import android.content.res.AssetManager |
| 5 | import android.util.Log | 5 | import android.util.Log |
| 6 | import androidx.compose.runtime.MutableState | 6 | import androidx.compose.runtime.MutableState |
| 7 | -import androidx.compose.runtime.mutableStateOf | ||
| 8 | -import com.k2fsa.sherpa.onnx.* | 7 | +import androidx.compose.runtime.mutableFloatStateOf |
| 8 | +import androidx.compose.runtime.mutableIntStateOf | ||
| 9 | +import com.k2fsa.sherpa.onnx.OfflineTts | ||
| 10 | +import com.k2fsa.sherpa.onnx.getOfflineTtsConfig | ||
| 9 | import java.io.File | 11 | import java.io.File |
| 10 | import java.io.FileOutputStream | 12 | import java.io.FileOutputStream |
| 11 | import java.io.IOException | 13 | import java.io.IOException |
| @@ -21,8 +23,8 @@ object TtsEngine { | @@ -21,8 +23,8 @@ object TtsEngine { | ||
| 21 | var lang: String? = null | 23 | var lang: String? = null |
| 22 | 24 | ||
| 23 | 25 | ||
| 24 | - val speedState: MutableState<Float> = mutableStateOf(1.0F) | ||
| 25 | - val speakerIdState: MutableState<Int> = mutableStateOf(0) | 26 | + val speedState: MutableState<Float> = mutableFloatStateOf(1.0F) |
| 27 | + val speakerIdState: MutableState<Int> = mutableIntStateOf(0) | ||
| 26 | 28 | ||
| 27 | var speed: Float | 29 | var speed: Float |
| 28 | get() = speedState.value | 30 | get() = speedState.value |
| @@ -113,15 +115,15 @@ object TtsEngine { | @@ -113,15 +115,15 @@ object TtsEngine { | ||
| 113 | 115 | ||
| 114 | if (dataDir != null) { | 116 | if (dataDir != null) { |
| 115 | val newDir = copyDataDir(context, modelDir!!) | 117 | val newDir = copyDataDir(context, modelDir!!) |
| 116 | - modelDir = newDir + "/" + modelDir | ||
| 117 | - dataDir = newDir + "/" + dataDir | 118 | + modelDir = "$newDir/$modelDir" |
| 119 | + dataDir = "$newDir/$dataDir" | ||
| 118 | assets = null | 120 | assets = null |
| 119 | } | 121 | } |
| 120 | 122 | ||
| 121 | if (dictDir != null) { | 123 | if (dictDir != null) { |
| 122 | val newDir = copyDataDir(context, modelDir!!) | 124 | val newDir = copyDataDir(context, modelDir!!) |
| 123 | - modelDir = newDir + "/" + modelDir | ||
| 124 | - dictDir = modelDir + "/" + "dict" | 125 | + modelDir = "$newDir/$modelDir" |
| 126 | + dictDir = "$modelDir/dict" | ||
| 125 | ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" | 127 | ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst" |
| 126 | assets = null | 128 | assets = null |
| 127 | } | 129 | } |
| @@ -132,18 +134,18 @@ object TtsEngine { | @@ -132,18 +134,18 @@ object TtsEngine { | ||
| 132 | dictDir = dictDir ?: "", | 134 | dictDir = dictDir ?: "", |
| 133 | ruleFsts = ruleFsts ?: "", | 135 | ruleFsts = ruleFsts ?: "", |
| 134 | ruleFars = ruleFars ?: "" | 136 | ruleFars = ruleFars ?: "" |
| 135 | - )!! | 137 | + ) |
| 136 | 138 | ||
| 137 | tts = OfflineTts(assetManager = assets, config = config) | 139 | tts = OfflineTts(assetManager = assets, config = config) |
| 138 | } | 140 | } |
| 139 | 141 | ||
| 140 | 142 | ||
| 141 | private fun copyDataDir(context: Context, dataDir: String): String { | 143 | private fun copyDataDir(context: Context, dataDir: String): String { |
| 142 | - println("data dir is $dataDir") | 144 | + Log.i(TAG, "data dir is $dataDir") |
| 143 | copyAssets(context, dataDir) | 145 | copyAssets(context, dataDir) |
| 144 | 146 | ||
| 145 | val newDataDir = context.getExternalFilesDir(null)!!.absolutePath | 147 | val newDataDir = context.getExternalFilesDir(null)!!.absolutePath |
| 146 | - println("newDataDir: $newDataDir") | 148 | + Log.i(TAG, "newDataDir: $newDataDir") |
| 147 | return newDataDir | 149 | return newDataDir |
| 148 | } | 150 | } |
| 149 | 151 | ||
| @@ -158,12 +160,12 @@ object TtsEngine { | @@ -158,12 +160,12 @@ object TtsEngine { | ||
| 158 | val dir = File(fullPath) | 160 | val dir = File(fullPath) |
| 159 | dir.mkdirs() | 161 | dir.mkdirs() |
| 160 | for (asset in assets.iterator()) { | 162 | for (asset in assets.iterator()) { |
| 161 | - val p: String = if (path == "") "" else path + "/" | 163 | + val p: String = if (path == "") "" else "$path/" |
| 162 | copyAssets(context, p + asset) | 164 | copyAssets(context, p + asset) |
| 163 | } | 165 | } |
| 164 | } | 166 | } |
| 165 | } catch (ex: IOException) { | 167 | } catch (ex: IOException) { |
| 166 | - Log.e(TAG, "Failed to copy $path. ${ex.toString()}") | 168 | + Log.e(TAG, "Failed to copy $path. $ex") |
| 167 | } | 169 | } |
| 168 | } | 170 | } |
| 169 | 171 | ||
| @@ -183,7 +185,7 @@ object TtsEngine { | @@ -183,7 +185,7 @@ object TtsEngine { | ||
| 183 | ostream.flush() | 185 | ostream.flush() |
| 184 | ostream.close() | 186 | ostream.close() |
| 185 | } catch (ex: Exception) { | 187 | } catch (ex: Exception) { |
| 186 | - Log.e(TAG, "Failed to copy $filename, ${ex.toString()}") | 188 | + Log.e(TAG, "Failed to copy $filename, $ex") |
| 187 | } | 189 | } |
| 188 | } | 190 | } |
| 189 | } | 191 | } |
| @@ -6,7 +6,6 @@ import android.speech.tts.SynthesisRequest | @@ -6,7 +6,6 @@ import android.speech.tts.SynthesisRequest | ||
| 6 | import android.speech.tts.TextToSpeech | 6 | import android.speech.tts.TextToSpeech |
| 7 | import android.speech.tts.TextToSpeechService | 7 | import android.speech.tts.TextToSpeechService |
| 8 | import android.util.Log | 8 | import android.util.Log |
| 9 | -import com.k2fsa.sherpa.onnx.* | ||
| 10 | 9 | ||
| 11 | /* | 10 | /* |
| 12 | https://developer.android.com/reference/java/util/Locale#getISO3Language() | 11 | https://developer.android.com/reference/java/util/Locale#getISO3Language() |
| 1 | package com.k2fsa.sherpa.onnx.tts.engine | 1 | package com.k2fsa.sherpa.onnx.tts.engine |
| 2 | 2 | ||
| 3 | import android.app.Application | 3 | import android.app.Application |
| 4 | -import android.os.FileUtils.ProgressListener | ||
| 5 | import android.speech.tts.TextToSpeech | 4 | import android.speech.tts.TextToSpeech |
| 6 | import android.speech.tts.TextToSpeech.OnInitListener | 5 | import android.speech.tts.TextToSpeech.OnInitListener |
| 7 | import android.speech.tts.UtteranceProgressListener | 6 | import android.speech.tts.UtteranceProgressListener |
| @@ -27,7 +26,7 @@ class TtsViewModel : ViewModel() { | @@ -27,7 +26,7 @@ class TtsViewModel : ViewModel() { | ||
| 27 | private val onInitListener = object : OnInitListener { | 26 | private val onInitListener = object : OnInitListener { |
| 28 | override fun onInit(status: Int) { | 27 | override fun onInit(status: Int) { |
| 29 | when (status) { | 28 | when (status) { |
| 30 | - TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeded") | 29 | + TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeeded") |
| 31 | TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed") | 30 | TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed") |
| 32 | else -> Log.i(TAG, "Unknown status $status") | 31 | else -> Log.i(TAG, "Unknown status $status") |
| 33 | } | 32 | } |
| @@ -15,7 +15,7 @@ | @@ -15,7 +15,7 @@ | ||
| 15 | android:theme="@style/Theme.SherpaOnnxVad" | 15 | android:theme="@style/Theme.SherpaOnnxVad" |
| 16 | tools:targetApi="31"> | 16 | tools:targetApi="31"> |
| 17 | <activity | 17 | <activity |
| 18 | - android:name=".MainActivity" | 18 | + android:name="com.k2fsa.sherpa.onnx.vad.MainActivity" |
| 19 | android:exported="true"> | 19 | android:exported="true"> |
| 20 | <intent-filter> | 20 | <intent-filter> |
| 21 | <action android:name="android.intent.action.MAIN" /> | 21 | <action android:name="android.intent.action.MAIN" /> |
| 1 | -package com.k2fsa.sherpa.onnx | 1 | +package com.k2fsa.sherpa.onnx.vad |
| 2 | 2 | ||
| 3 | import android.Manifest | 3 | import android.Manifest |
| 4 | import android.content.pm.PackageManager | 4 | import android.content.pm.PackageManager |
| @@ -11,6 +11,9 @@ import android.view.View | @@ -11,6 +11,9 @@ import android.view.View | ||
| 11 | import android.widget.Button | 11 | import android.widget.Button |
| 12 | import androidx.appcompat.app.AppCompatActivity | 12 | import androidx.appcompat.app.AppCompatActivity |
| 13 | import androidx.core.app.ActivityCompat | 13 | import androidx.core.app.ActivityCompat |
| 14 | +import com.k2fsa.sherpa.onnx.R | ||
| 15 | +import com.k2fsa.sherpa.onnx.Vad | ||
| 16 | +import com.k2fsa.sherpa.onnx.getVadModelConfig | ||
| 14 | import kotlin.concurrent.thread | 17 | import kotlin.concurrent.thread |
| 15 | 18 | ||
| 16 | 19 | ||
| @@ -116,7 +119,7 @@ class MainActivity : AppCompatActivity() { | @@ -116,7 +119,7 @@ class MainActivity : AppCompatActivity() { | ||
| 116 | 119 | ||
| 117 | private fun initVadModel() { | 120 | private fun initVadModel() { |
| 118 | val type = 0 | 121 | val type = 0 |
| 119 | - println("Select VAD model type ${type}") | 122 | + Log.i(TAG, "Select VAD model type ${type}") |
| 120 | val config = getVadModelConfig(type) | 123 | val config = getVadModelConfig(type) |
| 121 | 124 | ||
| 122 | vad = Vad( | 125 | vad = Vad( |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/Vad.kt |
| @@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
| 4 | xmlns:tools="http://schemas.android.com/tools" | 4 | xmlns:tools="http://schemas.android.com/tools" |
| 5 | android:layout_width="match_parent" | 5 | android:layout_width="match_parent" |
| 6 | android:layout_height="match_parent" | 6 | android:layout_height="match_parent" |
| 7 | - tools:context=".MainActivity"> | 7 | + tools:context="com.k2fsa.sherpa.onnx.vad.MainActivity"> |
| 8 | <LinearLayout | 8 | <LinearLayout |
| 9 | android:layout_width="match_parent" | 9 | android:layout_width="match_parent" |
| 10 | android:layout_height="match_parent" | 10 | android:layout_height="match_parent" |
| @@ -15,7 +15,7 @@ | @@ -15,7 +15,7 @@ | ||
| 15 | android:theme="@style/Theme.SherpaOnnxVadAsr" | 15 | android:theme="@style/Theme.SherpaOnnxVadAsr" |
| 16 | tools:targetApi="31"> | 16 | tools:targetApi="31"> |
| 17 | <activity | 17 | <activity |
| 18 | - android:name=".MainActivity" | 18 | + android:name=".vad.asr.MainActivity" |
| 19 | android:exported="true"> | 19 | android:exported="true"> |
| 20 | <intent-filter> | 20 | <intent-filter> |
| 21 | <action android:name="android.intent.action.MAIN" /> | 21 | <action android:name="android.intent.action.MAIN" /> |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/FeatureConfig.kt |
| 1 | -package com.k2fsa.sherpa.onnx | 1 | +package com.k2fsa.sherpa.onnx.vad.asr |
| 2 | 2 | ||
| 3 | import android.Manifest | 3 | import android.Manifest |
| 4 | import android.content.pm.PackageManager | 4 | import android.content.pm.PackageManager |
| @@ -13,6 +13,13 @@ import android.widget.Button | @@ -13,6 +13,13 @@ import android.widget.Button | ||
| 13 | import android.widget.TextView | 13 | import android.widget.TextView |
| 14 | import androidx.appcompat.app.AppCompatActivity | 14 | import androidx.appcompat.app.AppCompatActivity |
| 15 | import androidx.core.app.ActivityCompat | 15 | import androidx.core.app.ActivityCompat |
| 16 | +import com.k2fsa.sherpa.onnx.OfflineRecognizer | ||
| 17 | +import com.k2fsa.sherpa.onnx.OfflineRecognizerConfig | ||
| 18 | +import com.k2fsa.sherpa.onnx.R | ||
| 19 | +import com.k2fsa.sherpa.onnx.Vad | ||
| 20 | +import com.k2fsa.sherpa.onnx.getFeatureConfig | ||
| 21 | +import com.k2fsa.sherpa.onnx.getOfflineModelConfig | ||
| 22 | +import com.k2fsa.sherpa.onnx.getVadModelConfig | ||
| 16 | import kotlin.concurrent.thread | 23 | import kotlin.concurrent.thread |
| 17 | 24 | ||
| 18 | 25 | ||
| @@ -40,7 +47,7 @@ class MainActivity : AppCompatActivity() { | @@ -40,7 +47,7 @@ class MainActivity : AppCompatActivity() { | ||
| 40 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) | 47 | private val permissions: Array<String> = arrayOf(Manifest.permission.RECORD_AUDIO) |
| 41 | 48 | ||
| 42 | // Non-streaming ASR | 49 | // Non-streaming ASR |
| 43 | - private lateinit var offlineRecognizer: SherpaOnnxOffline | 50 | + private lateinit var offlineRecognizer: OfflineRecognizer |
| 44 | 51 | ||
| 45 | private var idx: Int = 0 | 52 | private var idx: Int = 0 |
| 46 | private var lastText: String = "" | 53 | private var lastText: String = "" |
| @@ -122,7 +129,7 @@ class MainActivity : AppCompatActivity() { | @@ -122,7 +129,7 @@ class MainActivity : AppCompatActivity() { | ||
| 122 | 129 | ||
| 123 | private fun initVadModel() { | 130 | private fun initVadModel() { |
| 124 | val type = 0 | 131 | val type = 0 |
| 125 | - println("Select VAD model type ${type}") | 132 | + Log.i(TAG, "Select VAD model type ${type}") |
| 126 | val config = getVadModelConfig(type) | 133 | val config = getVadModelConfig(type) |
| 127 | 134 | ||
| 128 | vad = Vad( | 135 | vad = Vad( |
| @@ -194,20 +201,25 @@ class MainActivity : AppCompatActivity() { | @@ -194,20 +201,25 @@ class MainActivity : AppCompatActivity() { | ||
| 194 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | 201 | // See https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html |
| 195 | // for a list of available models | 202 | // for a list of available models |
| 196 | val secondType = 0 | 203 | val secondType = 0 |
| 197 | - println("Select model type ${secondType} for the second pass") | 204 | + Log.i(TAG, "Select model type ${secondType} for the second pass") |
| 198 | 205 | ||
| 199 | val config = OfflineRecognizerConfig( | 206 | val config = OfflineRecognizerConfig( |
| 200 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), | 207 | featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), |
| 201 | modelConfig = getOfflineModelConfig(type = secondType)!!, | 208 | modelConfig = getOfflineModelConfig(type = secondType)!!, |
| 202 | ) | 209 | ) |
| 203 | 210 | ||
| 204 | - offlineRecognizer = SherpaOnnxOffline( | 211 | + offlineRecognizer = OfflineRecognizer( |
| 205 | assetManager = application.assets, | 212 | assetManager = application.assets, |
| 206 | config = config, | 213 | config = config, |
| 207 | ) | 214 | ) |
| 208 | } | 215 | } |
| 209 | 216 | ||
| 210 | private fun runSecondPass(samples: FloatArray): String { | 217 | private fun runSecondPass(samples: FloatArray): String { |
| 211 | - return offlineRecognizer.decode(samples, sampleRateInHz) | 218 | + val stream = offlineRecognizer.createStream() |
| 219 | + stream.acceptWaveform(samples, sampleRateInHz) | ||
| 220 | + offlineRecognizer.decode(stream) | ||
| 221 | + val result = offlineRecognizer.getResult(stream) | ||
| 222 | + stream.release() | ||
| 223 | + return result.text | ||
| 212 | } | 224 | } |
| 213 | } | 225 | } |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineRecognizer.kt |
| 1 | +../../../../../../../../../../sherpa-onnx/kotlin-api/OfflineStream.kt |
| 1 | -../../../../../../../../../SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt |
| @@ -4,7 +4,7 @@ | @@ -4,7 +4,7 @@ | ||
| 4 | xmlns:tools="http://schemas.android.com/tools" | 4 | xmlns:tools="http://schemas.android.com/tools" |
| 5 | android:layout_width="match_parent" | 5 | android:layout_width="match_parent" |
| 6 | android:layout_height="match_parent" | 6 | android:layout_height="match_parent" |
| 7 | - tools:context=".MainActivity"> | 7 | + tools:context=".vad.asr.MainActivity"> |
| 8 | 8 | ||
| 9 | <LinearLayout | 9 | <LinearLayout |
| 10 | android:layout_width="match_parent" | 10 | android:layout_width="match_parent" |
| 1 | <resources> | 1 | <resources> |
| 2 | - <string name="app_name">VAD-ASR</string> | 2 | + <string name="app_name">VAD+ASR</string> |
| 3 | <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi. | 3 | <string name="hint">Click the Start button to play speech-to-text with Next-gen Kaldi. |
| 4 | \n | 4 | \n |
| 5 | \n\n\n | 5 | \n\n\n |
| @@ -59,7 +59,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | @@ -59,7 +59,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | ||
| 59 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" | 59 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" |
| 60 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" | 60 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" |
| 61 | 61 | ||
| 62 | +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then | ||
| 63 | + SHERPA_ONNX_ENABLE_TTS=ON | ||
| 64 | +fi | ||
| 65 | + | ||
| 66 | +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then | ||
| 67 | + SHERPA_ONNX_ENABLE_BINARY=OFF | ||
| 68 | +fi | ||
| 69 | + | ||
| 62 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ | 70 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ |
| 71 | + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \ | ||
| 72 | + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \ | ||
| 63 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ | 73 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ |
| 64 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ | 74 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ |
| 65 | -DBUILD_ESPEAK_NG_EXE=OFF \ | 75 | -DBUILD_ESPEAK_NG_EXE=OFF \ |
| @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | ||
| 60 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" | 60 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" |
| 61 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" | 61 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" |
| 62 | 62 | ||
| 63 | +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then | ||
| 64 | + SHERPA_ONNX_ENABLE_TTS=ON | ||
| 65 | +fi | ||
| 66 | + | ||
| 67 | +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then | ||
| 68 | + SHERPA_ONNX_ENABLE_BINARY=OFF | ||
| 69 | +fi | ||
| 70 | + | ||
| 63 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ | 71 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ |
| 72 | + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \ | ||
| 73 | + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \ | ||
| 64 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ | 74 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ |
| 65 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ | 75 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ |
| 66 | -DBUILD_ESPEAK_NG_EXE=OFF \ | 76 | -DBUILD_ESPEAK_NG_EXE=OFF \ |
| @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | ||
| 60 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" | 60 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" |
| 61 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" | 61 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" |
| 62 | 62 | ||
| 63 | +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then | ||
| 64 | + SHERPA_ONNX_ENABLE_TTS=ON | ||
| 65 | +fi | ||
| 66 | + | ||
| 67 | +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then | ||
| 68 | + SHERPA_ONNX_ENABLE_BINARY=OFF | ||
| 69 | +fi | ||
| 70 | + | ||
| 63 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ | 71 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ |
| 72 | + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \ | ||
| 73 | + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \ | ||
| 64 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ | 74 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ |
| 65 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ | 75 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ |
| 66 | -DBUILD_ESPEAK_NG_EXE=OFF \ | 76 | -DBUILD_ESPEAK_NG_EXE=OFF \ |
| @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | @@ -60,7 +60,17 @@ export SHERPA_ONNXRUNTIME_INCLUDE_DIR=$dir/$onnxruntime_version/headers/ | ||
| 60 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" | 60 | echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR" |
| 61 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" | 61 | echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR" |
| 62 | 62 | ||
| 63 | +if [ -z $SHERPA_ONNX_ENABLE_TTS ]; then | ||
| 64 | + SHERPA_ONNX_ENABLE_TTS=ON | ||
| 65 | +fi | ||
| 66 | + | ||
| 67 | +if [ -z $SHERPA_ONNX_ENABLE_BINARY ]; then | ||
| 68 | + SHERPA_ONNX_ENABLE_BINARY=OFF | ||
| 69 | +fi | ||
| 70 | + | ||
| 63 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ | 71 | cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \ |
| 72 | + -DSHERPA_ONNX_ENABLE_TTS=$SHERPA_ONNX_ENABLE_TTS \ | ||
| 73 | + -DSHERPA_ONNX_ENABLE_BINARY=$SHERPA_ONNX_ENABLE_BINARY \ | ||
| 64 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ | 74 | -DBUILD_PIPER_PHONMIZE_EXE=OFF \ |
| 65 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ | 75 | -DBUILD_PIPER_PHONMIZE_TESTS=OFF \ |
| 66 | -DBUILD_ESPEAK_NG_EXE=OFF \ | 76 | -DBUILD_ESPEAK_NG_EXE=OFF \ |
kotlin-api-examples/FeatureConfig.kt
0 → 120000
| 1 | +../sherpa-onnx/kotlin-api/FeatureConfig.kt |
kotlin-api-examples/Main.kt
已删除
100644 → 0
| 1 | -package com.k2fsa.sherpa.onnx | ||
| 2 | - | ||
| 3 | -import android.content.res.AssetManager | ||
| 4 | - | ||
| 5 | -fun callback(samples: FloatArray): Unit { | ||
| 6 | - println("callback got called with ${samples.size} samples"); | ||
| 7 | -} | ||
| 8 | - | ||
| 9 | -fun main() { | ||
| 10 | - testSpokenLanguageIdentifcation() | ||
| 11 | - testAudioTagging() | ||
| 12 | - testSpeakerRecognition() | ||
| 13 | - testTts() | ||
| 14 | - testAsr("transducer") | ||
| 15 | - testAsr("zipformer2-ctc") | ||
| 16 | -} | ||
| 17 | - | ||
| 18 | -fun testSpokenLanguageIdentifcation() { | ||
| 19 | - val config = SpokenLanguageIdentificationConfig( | ||
| 20 | - whisper = SpokenLanguageIdentificationWhisperConfig( | ||
| 21 | - encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx", | ||
| 22 | - decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx", | ||
| 23 | - tailPaddings = 33, | ||
| 24 | - ), | ||
| 25 | - numThreads=1, | ||
| 26 | - debug=true, | ||
| 27 | - provider="cpu", | ||
| 28 | - ) | ||
| 29 | - val slid = SpokenLanguageIdentification(assetManager=null, config=config) | ||
| 30 | - | ||
| 31 | - val testFiles = arrayOf( | ||
| 32 | - "./spoken-language-identification-test-wavs/ar-arabic.wav", | ||
| 33 | - "./spoken-language-identification-test-wavs/bg-bulgarian.wav", | ||
| 34 | - "./spoken-language-identification-test-wavs/de-german.wav", | ||
| 35 | - ) | ||
| 36 | - | ||
| 37 | - for (waveFilename in testFiles) { | ||
| 38 | - val objArray = WaveReader.readWaveFromFile( | ||
| 39 | - filename = waveFilename, | ||
| 40 | - ) | ||
| 41 | - val samples: FloatArray = objArray[0] as FloatArray | ||
| 42 | - val sampleRate: Int = objArray[1] as Int | ||
| 43 | - | ||
| 44 | - val stream = slid.createStream() | ||
| 45 | - stream.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 46 | - val lang = slid.compute(stream) | ||
| 47 | - stream.release() | ||
| 48 | - println(waveFilename) | ||
| 49 | - println(lang) | ||
| 50 | - } | ||
| 51 | -} | ||
| 52 | - | ||
| 53 | -fun testAudioTagging() { | ||
| 54 | - val config = AudioTaggingConfig( | ||
| 55 | - model=AudioTaggingModelConfig( | ||
| 56 | - zipformer=OfflineZipformerAudioTaggingModelConfig( | ||
| 57 | - model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx", | ||
| 58 | - ), | ||
| 59 | - numThreads=1, | ||
| 60 | - debug=true, | ||
| 61 | - provider="cpu", | ||
| 62 | - ), | ||
| 63 | - labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv", | ||
| 64 | - topK=5, | ||
| 65 | - ) | ||
| 66 | - val tagger = AudioTagging(assetManager=null, config=config) | ||
| 67 | - | ||
| 68 | - val testFiles = arrayOf( | ||
| 69 | - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav", | ||
| 70 | - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav", | ||
| 71 | - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav", | ||
| 72 | - "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav", | ||
| 73 | - ) | ||
| 74 | - println("----------") | ||
| 75 | - for (waveFilename in testFiles) { | ||
| 76 | - val stream = tagger.createStream() | ||
| 77 | - | ||
| 78 | - val objArray = WaveReader.readWaveFromFile( | ||
| 79 | - filename = waveFilename, | ||
| 80 | - ) | ||
| 81 | - val samples: FloatArray = objArray[0] as FloatArray | ||
| 82 | - val sampleRate: Int = objArray[1] as Int | ||
| 83 | - | ||
| 84 | - stream.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 85 | - val events = tagger.compute(stream) | ||
| 86 | - stream.release() | ||
| 87 | - | ||
| 88 | - println(waveFilename) | ||
| 89 | - println(events) | ||
| 90 | - println("----------") | ||
| 91 | - } | ||
| 92 | - | ||
| 93 | - tagger.release() | ||
| 94 | -} | ||
| 95 | - | ||
| 96 | -fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray { | ||
| 97 | - var objArray = WaveReader.readWaveFromFile( | ||
| 98 | - filename = filename, | ||
| 99 | - ) | ||
| 100 | - var samples: FloatArray = objArray[0] as FloatArray | ||
| 101 | - var sampleRate: Int = objArray[1] as Int | ||
| 102 | - | ||
| 103 | - val stream = extractor.createStream() | ||
| 104 | - stream.acceptWaveform(sampleRate = sampleRate, samples=samples) | ||
| 105 | - stream.inputFinished() | ||
| 106 | - check(extractor.isReady(stream)) | ||
| 107 | - | ||
| 108 | - val embedding = extractor.compute(stream) | ||
| 109 | - | ||
| 110 | - stream.release() | ||
| 111 | - | ||
| 112 | - return embedding | ||
| 113 | -} | ||
| 114 | - | ||
| 115 | -fun testSpeakerRecognition() { | ||
| 116 | - val config = SpeakerEmbeddingExtractorConfig( | ||
| 117 | - model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx", | ||
| 118 | - ) | ||
| 119 | - val extractor = SpeakerEmbeddingExtractor(config = config) | ||
| 120 | - | ||
| 121 | - val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav") | ||
| 122 | - val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav") | ||
| 123 | - val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav") | ||
| 124 | - | ||
| 125 | - var manager = SpeakerEmbeddingManager(extractor.dim()) | ||
| 126 | - var ok = manager.add(name = "speaker1", embedding=embedding1a) | ||
| 127 | - check(ok) | ||
| 128 | - | ||
| 129 | - manager.add(name = "speaker2", embedding=embedding2a) | ||
| 130 | - check(ok) | ||
| 131 | - | ||
| 132 | - var name = manager.search(embedding=embedding1b, threshold=0.5f) | ||
| 133 | - check(name == "speaker1") | ||
| 134 | - | ||
| 135 | - manager.release() | ||
| 136 | - | ||
| 137 | - manager = SpeakerEmbeddingManager(extractor.dim()) | ||
| 138 | - val embeddingList = mutableListOf(embedding1a, embedding1b) | ||
| 139 | - ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray()) | ||
| 140 | - check(ok) | ||
| 141 | - | ||
| 142 | - name = manager.search(embedding=embedding1b, threshold=0.5f) | ||
| 143 | - check(name == "s1") | ||
| 144 | - | ||
| 145 | - name = manager.search(embedding=embedding2a, threshold=0.5f) | ||
| 146 | - check(name.length == 0) | ||
| 147 | - | ||
| 148 | - manager.release() | ||
| 149 | -} | ||
| 150 | - | ||
| 151 | -fun testTts() { | ||
| 152 | - // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 153 | - // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | ||
| 154 | - var config = OfflineTtsConfig( | ||
| 155 | - model=OfflineTtsModelConfig( | ||
| 156 | - vits=OfflineTtsVitsModelConfig( | ||
| 157 | - model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx", | ||
| 158 | - tokens="./vits-piper-en_US-amy-low/tokens.txt", | ||
| 159 | - dataDir="./vits-piper-en_US-amy-low/espeak-ng-data", | ||
| 160 | - ), | ||
| 161 | - numThreads=1, | ||
| 162 | - debug=true, | ||
| 163 | - ) | ||
| 164 | - ) | ||
| 165 | - val tts = OfflineTts(config=config) | ||
| 166 | - val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback) | ||
| 167 | - audio.save(filename="test-en.wav") | ||
| 168 | -} | ||
| 169 | - | ||
| 170 | -fun testAsr(type: String) { | ||
| 171 | - var featConfig = FeatureConfig( | ||
| 172 | - sampleRate = 16000, | ||
| 173 | - featureDim = 80, | ||
| 174 | - ) | ||
| 175 | - | ||
| 176 | - var waveFilename: String | ||
| 177 | - var modelConfig: OnlineModelConfig = when (type) { | ||
| 178 | - "transducer" -> { | ||
| 179 | - waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav" | ||
| 180 | - // please refer to | ||
| 181 | - // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 182 | - // to dowload pre-trained models | ||
| 183 | - OnlineModelConfig( | ||
| 184 | - transducer = OnlineTransducerModelConfig( | ||
| 185 | - encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx", | ||
| 186 | - decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx", | ||
| 187 | - joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx", | ||
| 188 | - ), | ||
| 189 | - tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt", | ||
| 190 | - numThreads = 1, | ||
| 191 | - debug = false, | ||
| 192 | - ) | ||
| 193 | - } | ||
| 194 | - "zipformer2-ctc" -> { | ||
| 195 | - waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav" | ||
| 196 | - OnlineModelConfig( | ||
| 197 | - zipformer2Ctc = OnlineZipformer2CtcModelConfig( | ||
| 198 | - model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx", | ||
| 199 | - ), | ||
| 200 | - tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt", | ||
| 201 | - numThreads = 1, | ||
| 202 | - debug = false, | ||
| 203 | - ) | ||
| 204 | - } | ||
| 205 | - else -> throw IllegalArgumentException(type) | ||
| 206 | - } | ||
| 207 | - | ||
| 208 | - var endpointConfig = EndpointConfig() | ||
| 209 | - | ||
| 210 | - var lmConfig = OnlineLMConfig() | ||
| 211 | - | ||
| 212 | - var config = OnlineRecognizerConfig( | ||
| 213 | - modelConfig = modelConfig, | ||
| 214 | - lmConfig = lmConfig, | ||
| 215 | - featConfig = featConfig, | ||
| 216 | - endpointConfig = endpointConfig, | ||
| 217 | - enableEndpoint = true, | ||
| 218 | - decodingMethod = "greedy_search", | ||
| 219 | - maxActivePaths = 4, | ||
| 220 | - ) | ||
| 221 | - | ||
| 222 | - var model = SherpaOnnx( | ||
| 223 | - config = config, | ||
| 224 | - ) | ||
| 225 | - | ||
| 226 | - var objArray = WaveReader.readWaveFromFile( | ||
| 227 | - filename = waveFilename, | ||
| 228 | - ) | ||
| 229 | - var samples: FloatArray = objArray[0] as FloatArray | ||
| 230 | - var sampleRate: Int = objArray[1] as Int | ||
| 231 | - | ||
| 232 | - model.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 233 | - while (model.isReady()) { | ||
| 234 | - model.decode() | ||
| 235 | - } | ||
| 236 | - | ||
| 237 | - var tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds | ||
| 238 | - model.acceptWaveform(tailPaddings, sampleRate = sampleRate) | ||
| 239 | - model.inputFinished() | ||
| 240 | - while (model.isReady()) { | ||
| 241 | - model.decode() | ||
| 242 | - } | ||
| 243 | - | ||
| 244 | - println("results: ${model.text}") | ||
| 245 | -} |
kotlin-api-examples/OfflineRecognizer.kt
0 → 120000
| 1 | +../sherpa-onnx/kotlin-api/OfflineRecognizer.kt |
kotlin-api-examples/OnlineRecognizer.kt
0 → 120000
| 1 | +../sherpa-onnx/kotlin-api/OnlineRecognizer.kt |
kotlin-api-examples/OnlineStream.kt
0 → 120000
| 1 | +../sherpa-onnx/kotlin-api/OnlineStream.kt |
kotlin-api-examples/SherpaOnnx.kt
已删除
120000 → 0
| 1 | -../android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt |
kotlin-api-examples/SherpaOnnx2Pass.kt
已删除
120000 → 0
| 1 | -../android/SherpaOnnx2Pass/app/src/main/java/com/k2fsa/sherpa/onnx/SherpaOnnx.kt |
| @@ -44,9 +44,23 @@ function testSpeakerEmbeddingExtractor() { | @@ -44,9 +44,23 @@ function testSpeakerEmbeddingExtractor() { | ||
| 44 | if [ ! -f ./speaker2_a_cn_16k.wav ]; then | 44 | if [ ! -f ./speaker2_a_cn_16k.wav ]; then |
| 45 | curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav | 45 | curl -SL -O https://github.com/csukuangfj/sr-data/raw/main/test/3d-speaker/speaker2_a_cn_16k.wav |
| 46 | fi | 46 | fi |
| 47 | + | ||
| 48 | + out_filename=test_speaker_id.jar | ||
| 49 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 50 | + test_speaker_id.kt \ | ||
| 51 | + OnlineStream.kt \ | ||
| 52 | + Speaker.kt \ | ||
| 53 | + WaveReader.kt \ | ||
| 54 | + faked-asset-manager.kt \ | ||
| 55 | + faked-log.kt | ||
| 56 | + | ||
| 57 | + ls -lh $out_filename | ||
| 58 | + | ||
| 59 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 47 | } | 60 | } |
| 48 | 61 | ||
| 49 | -function testAsr() { | 62 | + |
| 63 | +function testOnlineAsr() { | ||
| 50 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then | 64 | if [ ! -f ./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt ]; then |
| 51 | git lfs install | 65 | git lfs install |
| 52 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 | 66 | git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-en-2023-02-21 |
| @@ -57,6 +71,20 @@ function testAsr() { | @@ -57,6 +71,20 @@ function testAsr() { | ||
| 57 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 71 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 58 | rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 72 | rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 59 | fi | 73 | fi |
| 74 | + | ||
| 75 | + out_filename=test_online_asr.jar | ||
| 76 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 77 | + test_online_asr.kt \ | ||
| 78 | + FeatureConfig.kt \ | ||
| 79 | + OnlineRecognizer.kt \ | ||
| 80 | + OnlineStream.kt \ | ||
| 81 | + WaveReader.kt \ | ||
| 82 | + faked-asset-manager.kt \ | ||
| 83 | + faked-log.kt | ||
| 84 | + | ||
| 85 | + ls -lh $out_filename | ||
| 86 | + | ||
| 87 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 60 | } | 88 | } |
| 61 | 89 | ||
| 62 | function testTts() { | 90 | function testTts() { |
| @@ -65,16 +93,42 @@ function testTts() { | @@ -65,16 +93,42 @@ function testTts() { | ||
| 65 | tar xf vits-piper-en_US-amy-low.tar.bz2 | 93 | tar xf vits-piper-en_US-amy-low.tar.bz2 |
| 66 | rm vits-piper-en_US-amy-low.tar.bz2 | 94 | rm vits-piper-en_US-amy-low.tar.bz2 |
| 67 | fi | 95 | fi |
| 96 | + | ||
| 97 | + out_filename=test_tts.jar | ||
| 98 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 99 | + test_tts.kt \ | ||
| 100 | + Tts.kt \ | ||
| 101 | + faked-asset-manager.kt \ | ||
| 102 | + faked-log.kt | ||
| 103 | + | ||
| 104 | + ls -lh $out_filename | ||
| 105 | + | ||
| 106 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 68 | } | 107 | } |
| 69 | 108 | ||
| 109 | + | ||
| 70 | function testAudioTagging() { | 110 | function testAudioTagging() { |
| 71 | if [ ! -d sherpa-onnx-zipformer-audio-tagging-2024-04-09 ]; then | 111 | if [ ! -d sherpa-onnx-zipformer-audio-tagging-2024-04-09 ]; then |
| 72 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | 112 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 |
| 73 | tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | 113 | tar xvf sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 |
| 74 | rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 | 114 | rm sherpa-onnx-zipformer-audio-tagging-2024-04-09.tar.bz2 |
| 75 | fi | 115 | fi |
| 116 | + | ||
| 117 | + out_filename=test_audio_tagging.jar | ||
| 118 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 119 | + test_audio_tagging.kt \ | ||
| 120 | + AudioTagging.kt \ | ||
| 121 | + OfflineStream.kt \ | ||
| 122 | + WaveReader.kt \ | ||
| 123 | + faked-asset-manager.kt \ | ||
| 124 | + faked-log.kt | ||
| 125 | + | ||
| 126 | + ls -lh $out_filename | ||
| 127 | + | ||
| 128 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 76 | } | 129 | } |
| 77 | 130 | ||
| 131 | + | ||
| 78 | function testSpokenLanguageIdentification() { | 132 | function testSpokenLanguageIdentification() { |
| 79 | if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then | 133 | if [ ! -f ./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx ]; then |
| 80 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 | 134 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 |
| @@ -87,50 +141,44 @@ function testSpokenLanguageIdentification() { | @@ -87,50 +141,44 @@ function testSpokenLanguageIdentification() { | ||
| 87 | tar xvf spoken-language-identification-test-wavs.tar.bz2 | 141 | tar xvf spoken-language-identification-test-wavs.tar.bz2 |
| 88 | rm spoken-language-identification-test-wavs.tar.bz2 | 142 | rm spoken-language-identification-test-wavs.tar.bz2 |
| 89 | fi | 143 | fi |
| 90 | -} | ||
| 91 | - | ||
| 92 | -function test() { | ||
| 93 | - testSpokenLanguageIdentification | ||
| 94 | - testAudioTagging | ||
| 95 | - testSpeakerEmbeddingExtractor | ||
| 96 | - testAsr | ||
| 97 | - testTts | ||
| 98 | -} | ||
| 99 | - | ||
| 100 | -test | ||
| 101 | 144 | ||
| 102 | -kotlinc-jvm -include-runtime -d main.jar \ | ||
| 103 | - AudioTagging.kt \ | ||
| 104 | - Main.kt \ | ||
| 105 | - OfflineStream.kt \ | ||
| 106 | - SherpaOnnx.kt \ | ||
| 107 | - Speaker.kt \ | 145 | + out_filename=test_language_id.jar |
| 146 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 147 | + test_language_id.kt \ | ||
| 108 | SpokenLanguageIdentification.kt \ | 148 | SpokenLanguageIdentification.kt \ |
| 109 | - Tts.kt \ | 149 | + OfflineStream.kt \ |
| 110 | WaveReader.kt \ | 150 | WaveReader.kt \ |
| 111 | faked-asset-manager.kt \ | 151 | faked-asset-manager.kt \ |
| 112 | faked-log.kt | 152 | faked-log.kt |
| 113 | 153 | ||
| 114 | -ls -lh main.jar | ||
| 115 | - | ||
| 116 | -java -Djava.library.path=../build/lib -jar main.jar | 154 | + ls -lh $out_filename |
| 117 | 155 | ||
| 118 | -function testTwoPass() { | ||
| 119 | - if [ ! -f ./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/encoder-epoch-99-avg-1.int8.onnx ]; then | ||
| 120 | - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | ||
| 121 | - tar xvf sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | ||
| 122 | - rm sherpa-onnx-streaming-zipformer-en-20M-2023-02-17.tar.bz2 | ||
| 123 | - fi | 156 | + java -Djava.library.path=../build/lib -jar $out_filename |
| 157 | +} | ||
| 124 | 158 | ||
| 159 | +function testOfflineAsr() { | ||
| 125 | if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then | 160 | if [ ! -f ./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx ]; then |
| 126 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 | 161 | curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 127 | tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 | 162 | tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 128 | rm sherpa-onnx-whisper-tiny.en.tar.bz2 | 163 | rm sherpa-onnx-whisper-tiny.en.tar.bz2 |
| 129 | fi | 164 | fi |
| 130 | 165 | ||
| 131 | - kotlinc-jvm -include-runtime -d 2pass.jar test-2pass.kt WaveReader.kt SherpaOnnx2Pass.kt faked-asset-manager.kt | ||
| 132 | - ls -lh 2pass.jar | ||
| 133 | - java -Djava.library.path=../build/lib -jar 2pass.jar | 166 | + out_filename=test_offline_asr.jar |
| 167 | + kotlinc-jvm -include-runtime -d $out_filename \ | ||
| 168 | + test_offline_asr.kt \ | ||
| 169 | + FeatureConfig.kt \ | ||
| 170 | + OfflineRecognizer.kt \ | ||
| 171 | + OfflineStream.kt \ | ||
| 172 | + WaveReader.kt \ | ||
| 173 | + faked-asset-manager.kt | ||
| 174 | + | ||
| 175 | + ls -lh $out_filename | ||
| 176 | + java -Djava.library.path=../build/lib -jar $out_filename | ||
| 134 | } | 177 | } |
| 135 | 178 | ||
| 136 | -testTwoPass | 179 | +testSpeakerEmbeddingExtractor |
| 180 | +testOnlineAsr | ||
| 181 | +testTts | ||
| 182 | +testAudioTagging | ||
| 183 | +testSpokenLanguageIdentification | ||
| 184 | +testOfflineAsr |
kotlin-api-examples/test-2pass.kt
已删除
100644 → 0
| 1 | -package com.k2fsa.sherpa.onnx | ||
| 2 | - | ||
| 3 | -fun main() { | ||
| 4 | - test2Pass() | ||
| 5 | -} | ||
| 6 | - | ||
| 7 | -fun test2Pass() { | ||
| 8 | - val firstPass = createFirstPass() | ||
| 9 | - val secondPass = createSecondPass() | ||
| 10 | - | ||
| 11 | - val waveFilename = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav" | ||
| 12 | - | ||
| 13 | - var objArray = WaveReader.readWaveFromFile( | ||
| 14 | - filename = waveFilename, | ||
| 15 | - ) | ||
| 16 | - var samples: FloatArray = objArray[0] as FloatArray | ||
| 17 | - var sampleRate: Int = objArray[1] as Int | ||
| 18 | - | ||
| 19 | - firstPass.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 20 | - while (firstPass.isReady()) { | ||
| 21 | - firstPass.decode() | ||
| 22 | - } | ||
| 23 | - | ||
| 24 | - var text = firstPass.text | ||
| 25 | - println("First pass text: $text") | ||
| 26 | - | ||
| 27 | - text = secondPass.decode(samples, sampleRate) | ||
| 28 | - println("Second pass text: $text") | ||
| 29 | -} | ||
| 30 | - | ||
| 31 | -fun createFirstPass(): SherpaOnnx { | ||
| 32 | - val config = OnlineRecognizerConfig( | ||
| 33 | - featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80), | ||
| 34 | - modelConfig = getModelConfig(type = 1)!!, | ||
| 35 | - endpointConfig = getEndpointConfig(), | ||
| 36 | - enableEndpoint = true, | ||
| 37 | - ) | ||
| 38 | - | ||
| 39 | - return SherpaOnnx(config = config) | ||
| 40 | -} | ||
| 41 | - | ||
| 42 | -fun createSecondPass(): SherpaOnnxOffline { | ||
| 43 | - val config = OfflineRecognizerConfig( | ||
| 44 | - featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80), | ||
| 45 | - modelConfig = getOfflineModelConfig(type = 2)!!, | ||
| 46 | - ) | ||
| 47 | - | ||
| 48 | - return SherpaOnnxOffline(config = config) | ||
| 49 | -} |
kotlin-api-examples/test_audio_tagging.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + testAudioTagging() | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +fun testAudioTagging() { | ||
| 8 | + val config = AudioTaggingConfig( | ||
| 9 | + model=AudioTaggingModelConfig( | ||
| 10 | + zipformer=OfflineZipformerAudioTaggingModelConfig( | ||
| 11 | + model="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/model.int8.onnx", | ||
| 12 | + ), | ||
| 13 | + numThreads=1, | ||
| 14 | + debug=true, | ||
| 15 | + provider="cpu", | ||
| 16 | + ), | ||
| 17 | + labels="./sherpa-onnx-zipformer-audio-tagging-2024-04-09/class_labels_indices.csv", | ||
| 18 | + topK=5, | ||
| 19 | + ) | ||
| 20 | + val tagger = AudioTagging(config=config) | ||
| 21 | + | ||
| 22 | + val testFiles = arrayOf( | ||
| 23 | + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/1.wav", | ||
| 24 | + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/2.wav", | ||
| 25 | + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/3.wav", | ||
| 26 | + "./sherpa-onnx-zipformer-audio-tagging-2024-04-09/test_wavs/4.wav", | ||
| 27 | + ) | ||
| 28 | + println("----------") | ||
| 29 | + for (waveFilename in testFiles) { | ||
| 30 | + val stream = tagger.createStream() | ||
| 31 | + | ||
| 32 | + val objArray = WaveReader.readWaveFromFile( | ||
| 33 | + filename = waveFilename, | ||
| 34 | + ) | ||
| 35 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 36 | + val sampleRate: Int = objArray[1] as Int | ||
| 37 | + | ||
| 38 | + stream.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 39 | + val events = tagger.compute(stream) | ||
| 40 | + stream.release() | ||
| 41 | + | ||
| 42 | + println(waveFilename) | ||
| 43 | + println(events) | ||
| 44 | + println("----------") | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + tagger.release() | ||
| 48 | +} | ||
| 49 | + |
kotlin-api-examples/test_language_id.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + testSpokenLanguageIdentifcation() | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +fun testSpokenLanguageIdentifcation() { | ||
| 8 | + val config = SpokenLanguageIdentificationConfig( | ||
| 9 | + whisper = SpokenLanguageIdentificationWhisperConfig( | ||
| 10 | + encoder = "./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx", | ||
| 11 | + decoder = "./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx", | ||
| 12 | + tailPaddings = 33, | ||
| 13 | + ), | ||
| 14 | + numThreads=1, | ||
| 15 | + debug=true, | ||
| 16 | + provider="cpu", | ||
| 17 | + ) | ||
| 18 | + val slid = SpokenLanguageIdentification(config=config) | ||
| 19 | + | ||
| 20 | + val testFiles = arrayOf( | ||
| 21 | + "./spoken-language-identification-test-wavs/ar-arabic.wav", | ||
| 22 | + "./spoken-language-identification-test-wavs/bg-bulgarian.wav", | ||
| 23 | + "./spoken-language-identification-test-wavs/de-german.wav", | ||
| 24 | + ) | ||
| 25 | + | ||
| 26 | + for (waveFilename in testFiles) { | ||
| 27 | + val objArray = WaveReader.readWaveFromFile( | ||
| 28 | + filename = waveFilename, | ||
| 29 | + ) | ||
| 30 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 31 | + val sampleRate: Int = objArray[1] as Int | ||
| 32 | + | ||
| 33 | + val stream = slid.createStream() | ||
| 34 | + stream.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 35 | + val lang = slid.compute(stream) | ||
| 36 | + stream.release() | ||
| 37 | + println(waveFilename) | ||
| 38 | + println(lang) | ||
| 39 | + } | ||
| 40 | + | ||
| 41 | + slid.release() | ||
| 42 | +} | ||
| 43 | + |
kotlin-api-examples/test_offline_asr.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + val recognizer = createOfflineRecognizer() | ||
| 5 | + | ||
| 6 | + val waveFilename = "./sherpa-onnx-streaming-zipformer-en-20M-2023-02-17/test_wavs/0.wav" | ||
| 7 | + | ||
| 8 | + val objArray = WaveReader.readWaveFromFile( | ||
| 9 | + filename = waveFilename, | ||
| 10 | + ) | ||
| 11 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 12 | + val sampleRate: Int = objArray[1] as Int | ||
| 13 | + | ||
| 14 | + val stream = recognizer.createStream() | ||
| 15 | + stream.acceptWaveform(samples, sampleRate=sampleRate) | ||
| 16 | + recognizer.decode(stream) | ||
| 17 | + | ||
| 18 | + val result = recognizer.getResult(stream) | ||
| 19 | + println(result) | ||
| 20 | + | ||
| 21 | + stream.release() | ||
| 22 | + recognizer.release() | ||
| 23 | +} | ||
| 24 | + | ||
| 25 | +fun createOfflineRecognizer(): OfflineRecognizer { | ||
| 26 | + val config = OfflineRecognizerConfig( | ||
| 27 | + featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80), | ||
| 28 | + modelConfig = getOfflineModelConfig(type = 2)!!, | ||
| 29 | + ) | ||
| 30 | + | ||
| 31 | + return OfflineRecognizer(config = config) | ||
| 32 | +} |
kotlin-api-examples/test_online_asr.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + testOnlineAsr("transducer") | ||
| 5 | + testOnlineAsr("zipformer2-ctc") | ||
| 6 | +} | ||
| 7 | + | ||
| 8 | +fun testOnlineAsr(type: String) { | ||
| 9 | + val featConfig = FeatureConfig( | ||
| 10 | + sampleRate = 16000, | ||
| 11 | + featureDim = 80, | ||
| 12 | + ) | ||
| 13 | + | ||
| 14 | + val waveFilename: String | ||
| 15 | + val modelConfig: OnlineModelConfig = when (type) { | ||
| 16 | + "transducer" -> { | ||
| 17 | + waveFilename = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/test_wavs/0.wav" | ||
| 18 | + // please refer to | ||
| 19 | + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 20 | + // to dowload pre-trained models | ||
| 21 | + OnlineModelConfig( | ||
| 22 | + transducer = OnlineTransducerModelConfig( | ||
| 23 | + encoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/encoder-epoch-99-avg-1.onnx", | ||
| 24 | + decoder = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/decoder-epoch-99-avg-1.onnx", | ||
| 25 | + joiner = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/joiner-epoch-99-avg-1.onnx", | ||
| 26 | + ), | ||
| 27 | + tokens = "./sherpa-onnx-streaming-zipformer-en-2023-02-21/tokens.txt", | ||
| 28 | + numThreads = 1, | ||
| 29 | + debug = false, | ||
| 30 | + ) | ||
| 31 | + } | ||
| 32 | + "zipformer2-ctc" -> { | ||
| 33 | + waveFilename = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav" | ||
| 34 | + OnlineModelConfig( | ||
| 35 | + zipformer2Ctc = OnlineZipformer2CtcModelConfig( | ||
| 36 | + model = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx", | ||
| 37 | + ), | ||
| 38 | + tokens = "./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt", | ||
| 39 | + numThreads = 1, | ||
| 40 | + debug = false, | ||
| 41 | + ) | ||
| 42 | + } | ||
| 43 | + else -> throw IllegalArgumentException(type) | ||
| 44 | + } | ||
| 45 | + | ||
| 46 | + val endpointConfig = EndpointConfig() | ||
| 47 | + | ||
| 48 | + val lmConfig = OnlineLMConfig() | ||
| 49 | + | ||
| 50 | + val config = OnlineRecognizerConfig( | ||
| 51 | + modelConfig = modelConfig, | ||
| 52 | + lmConfig = lmConfig, | ||
| 53 | + featConfig = featConfig, | ||
| 54 | + endpointConfig = endpointConfig, | ||
| 55 | + enableEndpoint = true, | ||
| 56 | + decodingMethod = "greedy_search", | ||
| 57 | + maxActivePaths = 4, | ||
| 58 | + ) | ||
| 59 | + | ||
| 60 | + val recognizer = OnlineRecognizer( | ||
| 61 | + config = config, | ||
| 62 | + ) | ||
| 63 | + | ||
| 64 | + val objArray = WaveReader.readWaveFromFile( | ||
| 65 | + filename = waveFilename, | ||
| 66 | + ) | ||
| 67 | + val samples: FloatArray = objArray[0] as FloatArray | ||
| 68 | + val sampleRate: Int = objArray[1] as Int | ||
| 69 | + | ||
| 70 | + val stream = recognizer.createStream() | ||
| 71 | + stream.acceptWaveform(samples, sampleRate = sampleRate) | ||
| 72 | + while (recognizer.isReady(stream)) { | ||
| 73 | + recognizer.decode(stream) | ||
| 74 | + } | ||
| 75 | + | ||
| 76 | + val tailPaddings = FloatArray((sampleRate * 0.5).toInt()) // 0.5 seconds | ||
| 77 | + stream.acceptWaveform(tailPaddings, sampleRate = sampleRate) | ||
| 78 | + stream.inputFinished() | ||
| 79 | + while (recognizer.isReady(stream)) { | ||
| 80 | + recognizer.decode(stream) | ||
| 81 | + } | ||
| 82 | + | ||
| 83 | + println("results: ${recognizer.getResult(stream).text}") | ||
| 84 | + | ||
| 85 | + stream.release() | ||
| 86 | + recognizer.release() | ||
| 87 | +} |
kotlin-api-examples/test_speaker_id.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + testSpeakerRecognition() | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +fun testSpeakerRecognition() { | ||
| 8 | + val config = SpeakerEmbeddingExtractorConfig( | ||
| 9 | + model="./3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx", | ||
| 10 | + ) | ||
| 11 | + val extractor = SpeakerEmbeddingExtractor(config = config) | ||
| 12 | + | ||
| 13 | + val embedding1a = computeEmbedding(extractor, "./speaker1_a_cn_16k.wav") | ||
| 14 | + val embedding2a = computeEmbedding(extractor, "./speaker2_a_cn_16k.wav") | ||
| 15 | + val embedding1b = computeEmbedding(extractor, "./speaker1_b_cn_16k.wav") | ||
| 16 | + | ||
| 17 | + var manager = SpeakerEmbeddingManager(extractor.dim()) | ||
| 18 | + var ok = manager.add(name = "speaker1", embedding=embedding1a) | ||
| 19 | + check(ok) | ||
| 20 | + | ||
| 21 | + manager.add(name = "speaker2", embedding=embedding2a) | ||
| 22 | + check(ok) | ||
| 23 | + | ||
| 24 | + var name = manager.search(embedding=embedding1b, threshold=0.5f) | ||
| 25 | + check(name == "speaker1") | ||
| 26 | + | ||
| 27 | + manager.release() | ||
| 28 | + | ||
| 29 | + manager = SpeakerEmbeddingManager(extractor.dim()) | ||
| 30 | + val embeddingList = mutableListOf(embedding1a, embedding1b) | ||
| 31 | + ok = manager.add(name = "s1", embedding=embeddingList.toTypedArray()) | ||
| 32 | + check(ok) | ||
| 33 | + | ||
| 34 | + name = manager.search(embedding=embedding1b, threshold=0.5f) | ||
| 35 | + check(name == "s1") | ||
| 36 | + | ||
| 37 | + name = manager.search(embedding=embedding2a, threshold=0.5f) | ||
| 38 | + check(name.length == 0) | ||
| 39 | + | ||
| 40 | + manager.release() | ||
| 41 | + extractor.release() | ||
| 42 | + println("Speaker ID test done!") | ||
| 43 | +} | ||
| 44 | + | ||
| 45 | +fun computeEmbedding(extractor: SpeakerEmbeddingExtractor, filename: String): FloatArray { | ||
| 46 | + var objArray = WaveReader.readWaveFromFile( | ||
| 47 | + filename = filename, | ||
| 48 | + ) | ||
| 49 | + var samples: FloatArray = objArray[0] as FloatArray | ||
| 50 | + var sampleRate: Int = objArray[1] as Int | ||
| 51 | + | ||
| 52 | + val stream = extractor.createStream() | ||
| 53 | + stream.acceptWaveform(sampleRate = sampleRate, samples=samples) | ||
| 54 | + stream.inputFinished() | ||
| 55 | + check(extractor.isReady(stream)) | ||
| 56 | + | ||
| 57 | + val embedding = extractor.compute(stream) | ||
| 58 | + | ||
| 59 | + stream.release() | ||
| 60 | + | ||
| 61 | + return embedding | ||
| 62 | +} |
kotlin-api-examples/test_tts.kt
0 → 100644
| 1 | +package com.k2fsa.sherpa.onnx | ||
| 2 | + | ||
| 3 | +fun main() { | ||
| 4 | + testTts() | ||
| 5 | +} | ||
| 6 | + | ||
| 7 | +fun testTts() { | ||
| 8 | + // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 9 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | ||
| 10 | + var config = OfflineTtsConfig( | ||
| 11 | + model=OfflineTtsModelConfig( | ||
| 12 | + vits=OfflineTtsVitsModelConfig( | ||
| 13 | + model="./vits-piper-en_US-amy-low/en_US-amy-low.onnx", | ||
| 14 | + tokens="./vits-piper-en_US-amy-low/tokens.txt", | ||
| 15 | + dataDir="./vits-piper-en_US-amy-low/espeak-ng-data", | ||
| 16 | + ), | ||
| 17 | + numThreads=1, | ||
| 18 | + debug=true, | ||
| 19 | + ) | ||
| 20 | + ) | ||
| 21 | + val tts = OfflineTts(config=config) | ||
| 22 | + val audio = tts.generateWithCallback(text="“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”", callback=::callback) | ||
| 23 | + audio.save(filename="test-en.wav") | ||
| 24 | + tts.release() | ||
| 25 | + println("Saved to test-en.wav") | ||
| 26 | +} | ||
| 27 | + | ||
| 28 | +fun callback(samples: FloatArray): Unit { | ||
| 29 | + println("callback got called with ${samples.size} samples"); | ||
| 30 | +} |
scripts/apk/build-apk-asr.sh.in
0 → 100644
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# | ||
| 3 | +# Auto generated! Please DO NOT EDIT! | ||
| 4 | + | ||
| 5 | +# Please set the environment variable ANDROID_NDK | ||
| 6 | +# before running this script | ||
| 7 | + | ||
| 8 | +# Inside the $ANDROID_NDK directory, you can find a binary ndk-build | ||
| 9 | +# and some other files like the file "build/cmake/android.toolchain.cmake" | ||
| 10 | + | ||
| 11 | +set -ex | ||
| 12 | + | ||
| 13 | +log() { | ||
| 14 | + # This function is from espnet | ||
| 15 | + local fname=${BASH_SOURCE[1]##*/} | ||
| 16 | + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 17 | +} | ||
| 18 | + | ||
| 19 | +SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 20 | + | ||
| 21 | +log "Building streaming ASR APK for sherpa-onnx v${SHERPA_ONNX_VERSION}" | ||
| 22 | + | ||
| 23 | +export SHERPA_ONNX_ENABLE_TTS=OFF | ||
| 24 | + | ||
| 25 | +log "====================arm64-v8a=================" | ||
| 26 | +./build-android-arm64-v8a.sh | ||
| 27 | +log "====================armv7-eabi================" | ||
| 28 | +./build-android-armv7-eabi.sh | ||
| 29 | +log "====================x86-64====================" | ||
| 30 | +./build-android-x86-64.sh | ||
| 31 | +log "====================x86====================" | ||
| 32 | +./build-android-x86.sh | ||
| 33 | + | ||
| 34 | +mkdir -p apks | ||
| 35 | + | ||
| 36 | +{% for model in model_list %} | ||
| 37 | +pushd ./android/SherpaOnnx/app/src/main/assets/ | ||
| 38 | +model_name={{ model.model_name }} | ||
| 39 | +type={{ model.idx }} | ||
| 40 | +lang={{ model.lang }} | ||
| 41 | +short_name={{ model.short_name }} | ||
| 42 | + | ||
| 43 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2 | ||
| 44 | +tar xvf ${model_name}.tar.bz2 | ||
| 45 | + | ||
| 46 | +{{ model.cmd }} | ||
| 47 | + | ||
| 48 | +rm -rf *.tar.bz2 | ||
| 49 | +ls -lh $model_name | ||
| 50 | + | ||
| 51 | +popd | ||
| 52 | +# Now we are at the project root directory | ||
| 53 | + | ||
| 54 | +git checkout . | ||
| 55 | +pushd android/SherpaOnnx/app/src/main/java/com/k2fsa/sherpa/onnx | ||
| 56 | +sed -i.bak s/"type = 0/type = $type/" ./MainActivity.kt | ||
| 57 | +git diff | ||
| 58 | +popd | ||
| 59 | + | ||
| 60 | +for arch in arm64-v8a armeabi-v7a x86_64 x86; do | ||
| 61 | + log "------------------------------------------------------------" | ||
| 62 | + log "build ASR apk for $arch" | ||
| 63 | + log "------------------------------------------------------------" | ||
| 64 | + src_arch=$arch | ||
| 65 | + if [ $arch == "armeabi-v7a" ]; then | ||
| 66 | + src_arch=armv7-eabi | ||
| 67 | + elif [ $arch == "x86_64" ]; then | ||
| 68 | + src_arch=x86-64 | ||
| 69 | + fi | ||
| 70 | + | ||
| 71 | + ls -lh ./build-android-$src_arch/install/lib/*.so | ||
| 72 | + | ||
| 73 | + cp -v ./build-android-$src_arch/install/lib/*.so ./android/SherpaOnnx/app/src/main/jniLibs/$arch/ | ||
| 74 | + | ||
| 75 | + pushd ./android/SherpaOnnx | ||
| 76 | + sed -i.bak s/2048/9012/g ./gradle.properties | ||
| 77 | + git diff ./gradle.properties | ||
| 78 | + ./gradlew assembleRelease | ||
| 79 | + popd | ||
| 80 | + | ||
| 81 | + mv android/SherpaOnnx/app/build/outputs/apk/release/app-release-unsigned.apk ./apks/sherpa-onnx-${SHERPA_ONNX_VERSION}-$arch-asr-$lang-$short_name.apk | ||
| 82 | + ls -lh apks | ||
| 83 | + rm -v ./android/SherpaOnnx/app/src/main/jniLibs/$arch/*.so | ||
| 84 | +done | ||
| 85 | + | ||
| 86 | +rm -rf ./android/SherpaOnnx/app/src/main/assets/$model_name | ||
| 87 | +{% endfor %} | ||
| 88 | + | ||
| 89 | +git checkout . | ||
| 90 | + | ||
| 91 | +ls -lh apks/ |
| @@ -29,6 +29,8 @@ log "====================x86-64====================" | @@ -29,6 +29,8 @@ log "====================x86-64====================" | ||
| 29 | log "====================x86====================" | 29 | log "====================x86====================" |
| 30 | ./build-android-x86.sh | 30 | ./build-android-x86.sh |
| 31 | 31 | ||
| 32 | +export SHERPA_ONNX_ENABLE_TTS=OFF | ||
| 33 | + | ||
| 32 | mkdir -p apks | 34 | mkdir -p apks |
| 33 | 35 | ||
| 34 | {% for model in model_list %} | 36 | {% for model in model_list %} |
| @@ -29,6 +29,8 @@ log "====================x86-64====================" | @@ -29,6 +29,8 @@ log "====================x86-64====================" | ||
| 29 | log "====================x86====================" | 29 | log "====================x86====================" |
| 30 | ./build-android-x86.sh | 30 | ./build-android-x86.sh |
| 31 | 31 | ||
| 32 | +export SHERPA_ONNX_ENABLE_TTS=OFF | ||
| 33 | + | ||
| 32 | mkdir -p apks | 34 | mkdir -p apks |
| 33 | 35 | ||
| 34 | {% for model in model_list %} | 36 | {% for model in model_list %} |
| @@ -29,6 +29,8 @@ log "====================x86-64====================" | @@ -29,6 +29,8 @@ log "====================x86-64====================" | ||
| 29 | log "====================x86====================" | 29 | log "====================x86====================" |
| 30 | ./build-android-x86.sh | 30 | ./build-android-x86.sh |
| 31 | 31 | ||
| 32 | +export SHERPA_ONNX_ENABLE_TTS=OFF | ||
| 33 | + | ||
| 32 | mkdir -p apks | 34 | mkdir -p apks |
| 33 | 35 | ||
| 34 | {% for model in model_list %} | 36 | {% for model in model_list %} |
| @@ -29,6 +29,8 @@ log "====================x86-64====================" | @@ -29,6 +29,8 @@ log "====================x86-64====================" | ||
| 29 | log "====================x86====================" | 29 | log "====================x86====================" |
| 30 | ./build-android-x86.sh | 30 | ./build-android-x86.sh |
| 31 | 31 | ||
| 32 | +export SHERPA_ONNX_ENABLE_TTS=OFF | ||
| 33 | + | ||
| 32 | mkdir -p apks | 34 | mkdir -p apks |
| 33 | 35 | ||
| 34 | {% for model in model_list %} | 36 | {% for model in model_list %} |
| @@ -29,6 +29,8 @@ log "====================x86-64====================" | @@ -29,6 +29,8 @@ log "====================x86-64====================" | ||
| 29 | log "====================x86====================" | 29 | log "====================x86====================" |
| 30 | ./build-android-x86.sh | 30 | ./build-android-x86.sh |
| 31 | 31 | ||
| 32 | +export SHERPA_ONNX_ENABLE_TTS=ON | ||
| 33 | + | ||
| 32 | mkdir -p apks | 34 | mkdir -p apks |
| 33 | 35 | ||
| 34 | {% for tts_model in tts_model_list %} | 36 | {% for tts_model in tts_model_list %} |
| @@ -29,6 +29,8 @@ log "====================x86-64====================" | @@ -29,6 +29,8 @@ log "====================x86-64====================" | ||
| 29 | log "====================x86====================" | 29 | log "====================x86====================" |
| 30 | ./build-android-x86.sh | 30 | ./build-android-x86.sh |
| 31 | 31 | ||
| 32 | +export SHERPA_ONNX_ENABLE_TTS=ON | ||
| 33 | + | ||
| 32 | mkdir -p apks | 34 | mkdir -p apks |
| 33 | 35 | ||
| 34 | {% for tts_model in tts_model_list %} | 36 | {% for tts_model in tts_model_list %} |
scripts/apk/generate-asr-apk-script.py
0 → 100755
| 1 | +#!/usr/bin/env python3 | ||
| 2 | + | ||
| 3 | +import argparse | ||
| 4 | +from dataclasses import dataclass | ||
| 5 | +from typing import List, Optional | ||
| 6 | + | ||
| 7 | +import jinja2 | ||
| 8 | + | ||
| 9 | + | ||
| 10 | +def get_args(): | ||
| 11 | + parser = argparse.ArgumentParser() | ||
| 12 | + parser.add_argument( | ||
| 13 | + "--total", | ||
| 14 | + type=int, | ||
| 15 | + default=1, | ||
| 16 | + help="Number of runners", | ||
| 17 | + ) | ||
| 18 | + parser.add_argument( | ||
| 19 | + "--index", | ||
| 20 | + type=int, | ||
| 21 | + default=0, | ||
| 22 | + help="Index of the current runner", | ||
| 23 | + ) | ||
| 24 | + return parser.parse_args() | ||
| 25 | + | ||
| 26 | + | ||
| 27 | +@dataclass | ||
| 28 | +class Model: | ||
| 29 | + # We will download | ||
| 30 | + # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2 | ||
| 31 | + model_name: str | ||
| 32 | + | ||
| 33 | + # The type of the model, e..g, 0, 1, 2. It is hardcoded in the kotlin code | ||
| 34 | + idx: int | ||
| 35 | + | ||
| 36 | + # e.g., zh, en, zh_en | ||
| 37 | + lang: str | ||
| 38 | + | ||
| 39 | + # e.g., whisper, paraformer, zipformer | ||
| 40 | + short_name: str = "" | ||
| 41 | + | ||
| 42 | + # cmd is used to remove extra file from the model directory | ||
| 43 | + cmd: str = "" | ||
| 44 | + | ||
| 45 | + | ||
| 46 | +def get_models(): | ||
| 47 | + models = [ | ||
| 48 | + Model( | ||
| 49 | + model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20", | ||
| 50 | + idx=8, | ||
| 51 | + lang="bilingual_zh_en", | ||
| 52 | + short_name="zipformer", | ||
| 53 | + cmd=""" | ||
| 54 | + pushd $model_name | ||
| 55 | + rm -v decoder-epoch-99-avg-1.int8.onnx | ||
| 56 | + rm -v encoder-epoch-99-avg-1.onnx | ||
| 57 | + rm -v joiner-epoch-99-avg-1.onnx | ||
| 58 | + | ||
| 59 | + rm -v *.sh | ||
| 60 | + rm -v .gitattributes | ||
| 61 | + rm -v *state* | ||
| 62 | + rm -rfv test_wavs | ||
| 63 | + | ||
| 64 | + ls -lh | ||
| 65 | + | ||
| 66 | + popd | ||
| 67 | + """, | ||
| 68 | + ), | ||
| 69 | + ] | ||
| 70 | + | ||
| 71 | + return models | ||
| 72 | + | ||
| 73 | + | ||
| 74 | +def main(): | ||
| 75 | + args = get_args() | ||
| 76 | + index = args.index | ||
| 77 | + total = args.total | ||
| 78 | + assert 0 <= index < total, (index, total) | ||
| 79 | + | ||
| 80 | + all_model_list = get_models() | ||
| 81 | + | ||
| 82 | + num_models = len(all_model_list) | ||
| 83 | + | ||
| 84 | + num_per_runner = num_models // total | ||
| 85 | + if num_per_runner <= 0: | ||
| 86 | + raise ValueError(f"num_models: {num_models}, num_runners: {total}") | ||
| 87 | + | ||
| 88 | + start = index * num_per_runner | ||
| 89 | + end = start + num_per_runner | ||
| 90 | + | ||
| 91 | + remaining = num_models - args.total * num_per_runner | ||
| 92 | + | ||
| 93 | + print(f"{index}/{total}: {start}-{end}/{num_models}") | ||
| 94 | + | ||
| 95 | + d = dict() | ||
| 96 | + d["model_list"] = all_model_list[start:end] | ||
| 97 | + if index < remaining: | ||
| 98 | + s = args.total * num_per_runner + index | ||
| 99 | + d["model_list"].append(all_model_list[s]) | ||
| 100 | + print(f"{s}/{num_models}") | ||
| 101 | + | ||
| 102 | + filename_list = [ | ||
| 103 | + "./build-apk-asr.sh", | ||
| 104 | + ] | ||
| 105 | + for filename in filename_list: | ||
| 106 | + environment = jinja2.Environment() | ||
| 107 | + with open(f"{filename}.in") as f: | ||
| 108 | + s = f.read() | ||
| 109 | + template = environment.from_string(s) | ||
| 110 | + | ||
| 111 | + s = template.render(**d) | ||
| 112 | + with open(filename, "w") as f: | ||
| 113 | + print(s, file=f) | ||
| 114 | + | ||
| 115 | + | ||
| 116 | +if __name__ == "__main__": | ||
| 117 | + main() |
| @@ -82,7 +82,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { | @@ -82,7 +82,7 @@ bool OfflineTtsVitsModelConfig::Validate() const { | ||
| 82 | 82 | ||
| 83 | for (const auto &f : required_files) { | 83 | for (const auto &f : required_files) { |
| 84 | if (!FileExists(dict_dir + "/" + f)) { | 84 | if (!FileExists(dict_dir + "/" + f)) { |
| 85 | - SHERPA_ONNX_LOGE("'%s/%s' does not exist.", data_dir.c_str(), | 85 | + SHERPA_ONNX_LOGE("'%s/%s' does not exist.", dict_dir.c_str(), |
| 86 | f.c_str()); | 86 | f.c_str()); |
| 87 | return false; | 87 | return false; |
| 88 | } | 88 | } |
| @@ -12,8 +12,15 @@ endif() | @@ -12,8 +12,15 @@ endif() | ||
| 12 | set(sources | 12 | set(sources |
| 13 | audio-tagging.cc | 13 | audio-tagging.cc |
| 14 | jni.cc | 14 | jni.cc |
| 15 | + keyword-spotter.cc | ||
| 16 | + offline-recognizer.cc | ||
| 15 | offline-stream.cc | 17 | offline-stream.cc |
| 18 | + online-recognizer.cc | ||
| 19 | + online-stream.cc | ||
| 20 | + speaker-embedding-extractor.cc | ||
| 21 | + speaker-embedding-manager.cc | ||
| 16 | spoken-language-identification.cc | 22 | spoken-language-identification.cc |
| 23 | + voice-activity-detector.cc | ||
| 17 | ) | 24 | ) |
| 18 | 25 | ||
| 19 | if(SHERPA_ONNX_ENABLE_TTS) | 26 | if(SHERPA_ONNX_ENABLE_TTS) |
| @@ -6,6 +6,8 @@ | @@ -6,6 +6,8 @@ | ||
| 6 | #define SHERPA_ONNX_JNI_COMMON_H_ | 6 | #define SHERPA_ONNX_JNI_COMMON_H_ |
| 7 | 7 | ||
| 8 | #if __ANDROID_API__ >= 9 | 8 | #if __ANDROID_API__ >= 9 |
| 9 | +#include <strstream> | ||
| 10 | + | ||
| 9 | #include "android/asset_manager.h" | 11 | #include "android/asset_manager.h" |
| 10 | #include "android/asset_manager_jni.h" | 12 | #include "android/asset_manager_jni.h" |
| 11 | #endif | 13 | #endif |
-
请 注册 或 登录 后发表评论