Fangjun Kuang
Committed by GitHub

Use aar in Android Java demo. (#1616)

# Introduction
Please run the following commands to download model files before you run this Android demo:
```bash
# Assume we are inside
# /Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo
cd app/src/main/assets/
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx ./
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ./
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx ./
mv sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ./
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/*
mv encoder-epoch-99-avg-1.int8.onnx ./
mv decoder-epoch-99-avg-1.onnx ./
mv joiner-epoch-99-avg-1.int8.onnx ./
mv tokens.txt ./
```
You should have the following directory structure:
```
(py38) fangjuns-MacBook-Pro:assets fangjun$ pwd
/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxJavaDemo/app/src/main/assets
(py38) fangjuns-MacBook-Pro:assets fangjun$ tree .
.
└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
├── decoder-epoch-99-avg-1.onnx
├── encoder-epoch-99-avg-1.int8.onnx
├── joiner-epoch-99-avg-1.int8.onnx
└── tokens.txt
1 directory, 4 files
```
Remember to remove unused files to reduce the file size of the final APK.
... ...
... ... @@ -8,7 +8,7 @@ android {
defaultConfig {
applicationId "com.k2fsa.sherpa.onnx"
minSdk 28
targetSdk 32
targetSdk 34
versionCode 1
versionName "1.0"
... ... @@ -25,17 +25,14 @@ android {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
sourceSets.main{
jniLibs.srcDirs = ['jniLibs']
}
}
dependencies {
implementation 'androidx.appcompat:appcompat:1.3.1'
implementation 'com.google.android.material:material:1.3.0'
implementation 'androidx.constraintlayout:constraintlayout:1.1.3'
implementation 'pub.devrel:easypermissions:3.0.0'
implementation project(path: ':sherpa')
implementation 'androidx.core:core-ktx:1.7.0'
// implementation files('/Users/fangjun/open-source/sherpa-onnx/android/SherpaOnnxAar/sherpa_onnx/build/outputs/aar/sherpa_onnx-release.aar')
implementation 'com.github.k2-fsa:sherpa-onnx:master-SNAPSHOT'
}
... ...
// Copyright 2022-2023 by zhaoming
// Copyright 2024 Xiaomi Corporation
package com.k2fsa.sherpa.onnx;
import android.content.res.AssetManager;
public class OnlineRecognizer {
static {
System.loadLibrary("sherpa-onnx-jni");
}
private long ptr = 0;
public OnlineRecognizer(OnlineRecognizerConfig config) {
ptr = newFromFile(config);
}
public OnlineRecognizer(AssetManager assetManager, OnlineRecognizerConfig config) {
ptr = newFromAsset(assetManager, config);
}
public void decode(OnlineStream s) {
decode(ptr, s.getPtr());
}
public boolean isReady(OnlineStream s) {
return isReady(ptr, s.getPtr());
}
public boolean isEndpoint(OnlineStream s) {
return isEndpoint(ptr, s.getPtr());
}
public void reset(OnlineStream s) {
reset(ptr, s.getPtr());
}
public OnlineStream createStream() {
long p = createStream(ptr, "");
return new OnlineStream(p);
}
@Override
protected void finalize() throws Throwable {
release();
}
// You'd better call it manually if it is not used anymore
public void release() {
if (this.ptr == 0) {
return;
}
delete(this.ptr);
this.ptr = 0;
}
public OnlineRecognizerResult getResult(OnlineStream s) {
Object[] arr = getResult(ptr, s.getPtr());
String text = (String) arr[0];
String[] tokens = (String[]) arr[1];
float[] timestamps = (float[]) arr[2];
return new OnlineRecognizerResult(text, tokens, timestamps);
}
private native void delete(long ptr);
private native long newFromFile(OnlineRecognizerConfig config);
private native long newFromAsset(AssetManager assetManager, OnlineRecognizerConfig config);
private native long createStream(long ptr, String hotwords);
private native void reset(long ptr, long streamPtr);
private native void decode(long ptr, long streamPtr);
private native boolean isEndpoint(long ptr, long streamPtr);
private native boolean isReady(long ptr, long streamPtr);
private native Object[] getResult(long ptr, long streamPtr);
}
\ No newline at end of file
package com.k2fsa.sherpa.onnx.service;
import android.Manifest;
import android.annotation.SuppressLint;
import android.app.Notification;
import android.app.NotificationChannel;
import android.app.NotificationManager;
... ... @@ -67,6 +68,16 @@ public class SpeechSherpaRecognitionService extends Service {
appViewModel = Application.getInstance().getViewModel();
int numBytes = AudioRecord.getMinBufferSize(sampleRateInHz, channelConfig, audioFormat);
if (ActivityCompat.checkSelfPermission(this, Manifest.permission.RECORD_AUDIO) != PackageManager.PERMISSION_GRANTED) {
// TODO: Consider calling
// ActivityCompat#requestPermissions
// here to request the missing permissions, and then overriding
// public void onRequestPermissionsResult(int requestCode, String[] permissions,
// int[] grantResults)
// to handle the case where the user grants the permission. See the documentation
// for ActivityCompat#requestPermissions for more details.
return;
}
audioRecord = new AudioRecord(
audioSource,
sampleRateInHz,
... ... @@ -81,22 +92,21 @@ public class SpeechSherpaRecognitionService extends Service {
private void initializeSherpa() {
Log.d("Current Directory", System.getProperty("user.dir"));
String modelDir = "sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23";
String modelDir = "sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20";
initializeSherpaDir(modelDir, modelDir);
OnlineTransducerModelConfig onlineTransducerModelConfig = OnlineTransducerModelConfig.builder()
.setEncoder(modelDir + "/encoder-epoch-99-avg-1.int8.onnx")
.setDecoder(modelDir + "/decoder-epoch-99-avg-1.onnx")
.setJoiner(modelDir + "/joiner-epoch-99-avg-1.int8.onnx")
.build();
OnlineModelConfig onlineModelConfig = OnlineModelConfig.builder()
.setTransducer(onlineTransducerModelConfig)
.setTokens(modelDir + "/tokens.txt")
.setModelType("zipformer")
.build();
OnlineRecognizerConfig config = OnlineRecognizerConfig.builder()
.setOnlineModelConfig(onlineModelConfig)
.build();
OnlineTransducerModelConfig onlineTransducerModelConfig = new OnlineTransducerModelConfig();
onlineTransducerModelConfig.setEncoder(modelDir + "/encoder-epoch-99-avg-1.int8.onnx");
onlineTransducerModelConfig.setDecoder(modelDir + "/decoder-epoch-99-avg-1.onnx");
onlineTransducerModelConfig.setJoiner(modelDir + "/joiner-epoch-99-avg-1.int8.onnx");
OnlineModelConfig onlineModelConfig = new OnlineModelConfig();
onlineModelConfig.setTransducer(onlineTransducerModelConfig);
onlineModelConfig.setTokens(modelDir + "/tokens.txt");
onlineModelConfig.setModelType("zipformer");
onlineModelConfig.setDebug(true);
OnlineRecognizerConfig config = new OnlineRecognizerConfig();
config.setModelConfig(onlineModelConfig);
recognizer = new OnlineRecognizer(getAssets(), config);
audioRecord.startRecording();
... ... @@ -110,7 +120,7 @@ public class SpeechSherpaRecognitionService extends Service {
}
private void processSamples() {
OnlineStream stream = recognizer.createStream();
OnlineStream stream = recognizer.createStream("");
double interval = 0.1;
int bufferSize = (int) (interval * sampleRateInHz);
short[] buffer = new short[bufferSize];
... ... @@ -182,6 +192,7 @@ public class SpeechSherpaRecognitionService extends Service {
}
@SuppressLint("ForegroundServiceType")
private void startForegroundService() {
String channelId = createNotificationChannel();
... ...
... ... @@ -10,8 +10,8 @@ dependencyResolutionManagement {
repositories {
google()
mavenCentral()
maven { url 'https://jitpack.io' }
}
}
rootProject.name = "SherpaOnnxJavaDemo"
include ':app'
include ':sherpa'
... ...
plugins {
id 'com.android.library'
}
android {
namespace 'com.k2fsa.sherpa'
compileSdk 34
defaultConfig {
minSdk 26
targetSdk 27
versionCode 1
versionName "1.0"
missingDimensionStrategy 'base', 'feature1'
testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner"
}
buildTypes {
release {
minifyEnabled false
proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro'
}
}
compileOptions {
sourceCompatibility JavaVersion.VERSION_1_8
targetCompatibility JavaVersion.VERSION_1_8
}
}
dependencies {
// implementation "androidx.appcompat"
// implementation libs.material
// testImplementation libs.junit
// androidTestImplementation libs.androidx.test.ext.junit
// androidTestImplementation libs.espresso.core
implementation 'androidx.appcompat:appcompat:1.6.1'
implementation 'com.google.android.material:material:1.9.0'
testImplementation 'junit:junit:4.13.2'
androidTestImplementation 'androidx.test.ext:junit:1.1.5'
androidTestImplementation 'androidx.test.espresso:espresso-core:3.5.1'
}
\ No newline at end of file
# Add project specific ProGuard rules here.
# You can control the set of applied configuration files using the
# proguardFiles setting in build.gradle.
#
# For more details, see
# http://developer.android.com/guide/developing/tools/proguard.html
# If your project uses WebView with JS, uncomment the following
# and specify the fully qualified class name to the JavaScript interface
# class:
#-keepclassmembers class fqcn.of.javascript.interface.for.webview {
# public *;
#}
# Uncomment this to preserve the line number information for
# debugging stack traces.
#-keepattributes SourceFile,LineNumberTable
# If you keep the line number information, uncomment this to
# hide the original source file name.
#-renamesourcefileattribute SourceFile
\ No newline at end of file
<?xml version="1.0" encoding="utf-8"?>
<manifest xmlns:android="http://schemas.android.com/apk/res/android">
<application
android:allowBackup="true"
android:label="@string/app_name"
android:supportsRtl="true" />
</manifest>
\ No newline at end of file
../../../../../../../sherpa-onnx/sherpa-onnx/java-api/src/com
\ No newline at end of file
<resources>
<string name="app_name">sherpa</string>
</resources>
\ No newline at end of file
... ... @@ -15,8 +15,8 @@ data class AudioTaggingModelConfig(
)
data class AudioTaggingConfig(
var model: AudioTaggingModelConfig,
var labels: String,
var model: AudioTaggingModelConfig = AudioTaggingModelConfig(),
var labels: String = "",
var topK: Int = 5,
)
... ...
... ... @@ -5,7 +5,7 @@ import android.content.res.AssetManager
data class KeywordSpotterConfig(
var featConfig: FeatureConfig = FeatureConfig(),
var modelConfig: OnlineModelConfig,
var modelConfig: OnlineModelConfig = OnlineModelConfig(),
var maxActivePaths: Int = 4,
var keywordsFile: String = "keywords.txt",
var keywordsScore: Float = 1.5f,
... ...
... ... @@ -3,7 +3,7 @@ package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class OfflinePunctuationModelConfig(
var ctTransformer: String,
var ctTransformer: String = "",
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
... ...
... ... @@ -58,14 +58,14 @@ data class OfflineModelConfig(
var debug: Boolean = false,
var provider: String = "cpu",
var modelType: String = "",
var tokens: String,
var tokens: String = "",
var modelingUnit: String = "",
var bpeVocab: String = "",
)
data class OfflineRecognizerConfig(
var featConfig: FeatureConfig = FeatureConfig(),
var modelConfig: OfflineModelConfig,
var modelConfig: OfflineModelConfig = OfflineModelConfig(),
// var lmConfig: OfflineLMConfig(), // TODO(fangjun): enable it
var decodingMethod: String = "greedy_search",
var maxActivePaths: Int = 4,
... ...
... ... @@ -3,11 +3,11 @@ package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class OfflineSpeakerSegmentationPyannoteModelConfig(
var model: String,
var model: String = "",
)
data class OfflineSpeakerSegmentationModelConfig(
var pyannote: OfflineSpeakerSegmentationPyannoteModelConfig,
var pyannote: OfflineSpeakerSegmentationPyannoteModelConfig = OfflineSpeakerSegmentationPyannoteModelConfig(),
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
... ... @@ -19,9 +19,9 @@ data class FastClusteringConfig(
)
data class OfflineSpeakerDiarizationConfig(
var segmentation: OfflineSpeakerSegmentationModelConfig,
var embedding: SpeakerEmbeddingExtractorConfig,
var clustering: FastClusteringConfig,
var segmentation: OfflineSpeakerSegmentationModelConfig = OfflineSpeakerSegmentationModelConfig(),
var embedding: SpeakerEmbeddingExtractorConfig = SpeakerEmbeddingExtractorConfig(),
var clustering: FastClusteringConfig = FastClusteringConfig(),
var minDurationOn: Float = 0.2f,
var minDurationOff: Float = 0.5f,
)
... ...
... ... @@ -38,7 +38,7 @@ data class OnlineModelConfig(
var paraformer: OnlineParaformerModelConfig = OnlineParaformerModelConfig(),
var zipformer2Ctc: OnlineZipformer2CtcModelConfig = OnlineZipformer2CtcModelConfig(),
var neMoCtc: OnlineNeMoCtcModelConfig = OnlineNeMoCtcModelConfig(),
var tokens: String,
var tokens: String = "",
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
... ... @@ -60,7 +60,7 @@ data class OnlineCtcFstDecoderConfig(
data class OnlineRecognizerConfig(
var featConfig: FeatureConfig = FeatureConfig(),
var modelConfig: OnlineModelConfig,
var modelConfig: OnlineModelConfig = OnlineModelConfig(),
var lmConfig: OnlineLMConfig = OnlineLMConfig(),
var ctcFstDecoderConfig: OnlineCtcFstDecoderConfig = OnlineCtcFstDecoderConfig(),
var endpointConfig: EndpointConfig = EndpointConfig(),
... ...
package com.k2fsa.sherpa.onnx
data class SpeakerEmbeddingExtractorConfig(
val model: String,
val model: String = "",
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
... ...
... ... @@ -3,13 +3,13 @@ package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class SpokenLanguageIdentificationWhisperConfig(
var encoder: String,
var decoder: String,
var encoder: String = "",
var decoder: String = "",
var tailPaddings: Int = -1,
)
data class SpokenLanguageIdentificationConfig(
var whisper: SpokenLanguageIdentificationWhisperConfig,
var whisper: SpokenLanguageIdentificationWhisperConfig = SpokenLanguageIdentificationWhisperConfig(),
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
... ...
... ... @@ -4,9 +4,9 @@ package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class OfflineTtsVitsModelConfig(
var model: String,
var model: String = "",
var lexicon: String = "",
var tokens: String,
var tokens: String = "",
var dataDir: String = "",
var dictDir: String = "",
var noiseScale: Float = 0.667f,
... ... @@ -15,14 +15,14 @@ data class OfflineTtsVitsModelConfig(
)
data class OfflineTtsModelConfig(
var vits: OfflineTtsVitsModelConfig,
var vits: OfflineTtsVitsModelConfig = OfflineTtsVitsModelConfig(),
var numThreads: Int = 1,
var debug: Boolean = false,
var provider: String = "cpu",
)
data class OfflineTtsConfig(
var model: OfflineTtsModelConfig,
var model: OfflineTtsModelConfig = OfflineTtsModelConfig(),
var ruleFsts: String = "",
var ruleFars: String = "",
var maxNumSentences: Int = 1,
... ...
... ... @@ -4,7 +4,7 @@ package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
data class SileroVadModelConfig(
var model: String,
var model: String = "",
var threshold: Float = 0.5F,
var minSilenceDuration: Float = 0.25F,
var minSpeechDuration: Float = 0.25F,
... ... @@ -13,7 +13,7 @@ data class SileroVadModelConfig(
)
data class VadModelConfig(
var sileroVadModelConfig: SileroVadModelConfig,
var sileroVadModelConfig: SileroVadModelConfig = SileroVadModelConfig(),
var sampleRate: Int = 16000,
var numThreads: Int = 1,
var provider: String = "cpu",
... ... @@ -112,5 +112,5 @@ fun getVadModelConfig(type: Int): VadModelConfig? {
)
}
}
return null;
return null
}
... ...