Fangjun Kuang
Committed by GitHub

Run TTS engine service without starting the app. (#553)

1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR) 1 cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
2 project(sherpa-onnx) 2 project(sherpa-onnx)
3 3
4 -set(SHERPA_ONNX_VERSION "1.9.8") 4 +set(SHERPA_ONNX_VERSION "1.9.9")
5 5
6 # Disable warning about 6 # Disable warning about
7 # 7 #
1 <?xml version="1.0" encoding="utf-8"?> 1 <?xml version="1.0" encoding="utf-8"?>
2 <manifest xmlns:android="http://schemas.android.com/apk/res/android" 2 <manifest xmlns:android="http://schemas.android.com/apk/res/android"
3 - xmlns:tools="http://schemas.android.com/tools"> 3 + xmlns:tools="http://schemas.android.com/tools"
  4 + package="com.k2fsa.sherpa.onnx.tts.engine">
4 5
5 <application 6 <application
6 android:allowBackup="true" 7 android:allowBackup="true"
@@ -9,6 +9,7 @@ import android.util.Log @@ -9,6 +9,7 @@ import android.util.Log
9 import android.widget.Toast 9 import android.widget.Toast
10 import androidx.activity.ComponentActivity 10 import androidx.activity.ComponentActivity
11 import androidx.activity.compose.setContent 11 import androidx.activity.compose.setContent
  12 +import androidx.activity.viewModels
12 import androidx.compose.foundation.layout.Box 13 import androidx.compose.foundation.layout.Box
13 import androidx.compose.foundation.layout.Column 14 import androidx.compose.foundation.layout.Column
14 import androidx.compose.foundation.layout.Row 15 import androidx.compose.foundation.layout.Row
@@ -43,9 +44,13 @@ import java.lang.NumberFormatException @@ -43,9 +44,13 @@ import java.lang.NumberFormatException
43 const val TAG = "sherpa-onnx-tts-engine" 44 const val TAG = "sherpa-onnx-tts-engine"
44 45
45 class MainActivity : ComponentActivity() { 46 class MainActivity : ComponentActivity() {
  47 + // TODO(fangjun): Save settings in ttsViewModel
  48 + private val ttsViewModel: TtsViewModel by viewModels()
  49 +
  50 + private var mediaPlayer: MediaPlayer? = null
46 override fun onCreate(savedInstanceState: Bundle?) { 51 override fun onCreate(savedInstanceState: Bundle?) {
47 super.onCreate(savedInstanceState) 52 super.onCreate(savedInstanceState)
48 - TtsEngine.createTts(this.application) 53 + TtsEngine.createTts(this)
49 setContent { 54 setContent {
50 SherpaOnnxTtsEngineTheme { 55 SherpaOnnxTtsEngineTheme {
51 // A surface container using the 'background' color from the theme 56 // A surface container using the 'background' color from the theme
@@ -132,11 +137,12 @@ class MainActivity : ComponentActivity() { @@ -132,11 +137,12 @@ class MainActivity : ComponentActivity() {
132 audio.samples.size > 0 && audio.save(filename) 137 audio.samples.size > 0 && audio.save(filename)
133 138
134 if (ok) { 139 if (ok) {
135 - val mediaPlayer = MediaPlayer.create( 140 + stopMediaPlayer()
  141 + mediaPlayer = MediaPlayer.create(
136 applicationContext, 142 applicationContext,
137 Uri.fromFile(File(filename)) 143 Uri.fromFile(File(filename))
138 ) 144 )
139 - mediaPlayer.start() 145 + mediaPlayer?.start()
140 } else { 146 } else {
141 Log.i(TAG, "Failed to generate or save audio") 147 Log.i(TAG, "Failed to generate or save audio")
142 } 148 }
@@ -162,4 +168,15 @@ class MainActivity : ComponentActivity() { @@ -162,4 +168,15 @@ class MainActivity : ComponentActivity() {
162 } 168 }
163 } 169 }
164 } 170 }
  171 +
  172 + override fun onDestroy() {
  173 + stopMediaPlayer()
  174 + super.onDestroy()
  175 + }
  176 +
  177 + private fun stopMediaPlayer() {
  178 + mediaPlayer?.stop()
  179 + mediaPlayer?.release()
  180 + mediaPlayer = null
  181 + }
165 } 182 }
1 package com.k2fsa.sherpa.onnx.tts.engine 1 package com.k2fsa.sherpa.onnx.tts.engine
2 2
3 -import android.app.Application 3 +import android.content.Context
4 import android.content.res.AssetManager 4 import android.content.res.AssetManager
5 import android.util.Log 5 import android.util.Log
6 import androidx.compose.runtime.MutableState 6 import androidx.compose.runtime.MutableState
@@ -21,7 +21,6 @@ object TtsEngine { @@ -21,7 +21,6 @@ object TtsEngine {
21 var lang: String? = null 21 var lang: String? = null
22 22
23 23
24 -  
25 val speedState: MutableState<Float> = mutableStateOf(1.0F) 24 val speedState: MutableState<Float> = mutableStateOf(1.0F)
26 val speakerIdState: MutableState<Int> = mutableStateOf(0) 25 val speakerIdState: MutableState<Int> = mutableStateOf(0)
27 26
@@ -44,19 +43,7 @@ object TtsEngine { @@ -44,19 +43,7 @@ object TtsEngine {
44 private var dataDir: String? = null 43 private var dataDir: String? = null
45 private var assets: AssetManager? = null 44 private var assets: AssetManager? = null
46 45
47 - private var application: Application? = null  
48 -  
49 - fun createTts(application: Application) {  
50 - Log.i(TAG, "Init Next-gen Kaldi TTS")  
51 - if (tts == null) {  
52 - this.application = application  
53 - initTts()  
54 - }  
55 - }  
56 -  
57 - private fun initTts() {  
58 - assets = application?.assets  
59 - 46 + init {
60 // The purpose of such a design is to make the CI test easier 47 // The purpose of such a design is to make the CI test easier
61 // Please see 48 // Please see
62 // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py 49 // https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
@@ -89,9 +76,21 @@ object TtsEngine { @@ -89,9 +76,21 @@ object TtsEngine {
89 // ruleFsts = "vits-zh-aishell3/rule.fst" 76 // ruleFsts = "vits-zh-aishell3/rule.fst"
90 // lexicon = "lexicon.txt" 77 // lexicon = "lexicon.txt"
91 // lang = "zho" 78 // lang = "zho"
  79 + }
  80 +
  81 +
  82 + fun createTts(context: Context) {
  83 + Log.i(TAG, "Init Next-gen Kaldi TTS")
  84 + if (tts == null) {
  85 + initTts(context)
  86 + }
  87 + }
  88 +
  89 + private fun initTts(context: Context) {
  90 + assets = context.assets
92 91
93 if (dataDir != null) { 92 if (dataDir != null) {
94 - val newDir = copyDataDir(modelDir!!) 93 + val newDir = copyDataDir(context, modelDir!!)
95 modelDir = newDir + "/" + modelDir 94 modelDir = newDir + "/" + modelDir
96 dataDir = newDir + "/" + dataDir 95 dataDir = newDir + "/" + dataDir
97 assets = null 96 assets = null
@@ -107,28 +106,28 @@ object TtsEngine { @@ -107,28 +106,28 @@ object TtsEngine {
107 } 106 }
108 107
109 108
110 - private fun copyDataDir(dataDir: String): String { 109 + private fun copyDataDir(context: Context, dataDir: String): String {
111 println("data dir is $dataDir") 110 println("data dir is $dataDir")
112 - copyAssets(dataDir) 111 + copyAssets(context, dataDir)
113 112
114 - val newDataDir = application!!.getExternalFilesDir(null)!!.absolutePath 113 + val newDataDir = context.getExternalFilesDir(null)!!.absolutePath
115 println("newDataDir: $newDataDir") 114 println("newDataDir: $newDataDir")
116 return newDataDir 115 return newDataDir
117 } 116 }
118 117
119 - private fun copyAssets(path: String) { 118 + private fun copyAssets(context: Context, path: String) {
120 val assets: Array<String>? 119 val assets: Array<String>?
121 try { 120 try {
122 - assets = application!!.assets.list(path) 121 + assets = context.assets.list(path)
123 if (assets!!.isEmpty()) { 122 if (assets!!.isEmpty()) {
124 - copyFile(path) 123 + copyFile(context, path)
125 } else { 124 } else {
126 - val fullPath = "${application!!.getExternalFilesDir(null)}/$path" 125 + val fullPath = "${context.getExternalFilesDir(null)}/$path"
127 val dir = File(fullPath) 126 val dir = File(fullPath)
128 dir.mkdirs() 127 dir.mkdirs()
129 for (asset in assets.iterator()) { 128 for (asset in assets.iterator()) {
130 val p: String = if (path == "") "" else path + "/" 129 val p: String = if (path == "") "" else path + "/"
131 - copyAssets(p + asset) 130 + copyAssets(context, p + asset)
132 } 131 }
133 } 132 }
134 } catch (ex: IOException) { 133 } catch (ex: IOException) {
@@ -136,10 +135,10 @@ object TtsEngine { @@ -136,10 +135,10 @@ object TtsEngine {
136 } 135 }
137 } 136 }
138 137
139 - private fun copyFile(filename: String) { 138 + private fun copyFile(context: Context, filename: String) {
140 try { 139 try {
141 - val istream = application!!.assets.open(filename)  
142 - val newFilename = application!!.getExternalFilesDir(null).toString() + "/" + filename 140 + val istream = context.assets.open(filename)
  141 + val newFilename = context.getExternalFilesDir(null).toString() + "/" + filename
143 val ostream = FileOutputStream(newFilename) 142 val ostream = FileOutputStream(newFilename)
144 // Log.i(TAG, "Copying $filename to $newFilename") 143 // Log.i(TAG, "Copying $filename to $newFilename")
145 val buffer = ByteArray(1024) 144 val buffer = ByteArray(1024)
@@ -56,12 +56,18 @@ Failed to get default language from engine com.k2fsa.sherpa.chapter5 @@ -56,12 +56,18 @@ Failed to get default language from engine com.k2fsa.sherpa.chapter5
56 56
57 class TtsService : TextToSpeechService() { 57 class TtsService : TextToSpeechService() {
58 override fun onCreate() { 58 override fun onCreate() {
  59 + Log.i(TAG, "onCreate tts service")
59 super.onCreate() 60 super.onCreate()
60 61
61 // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68 62 // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68
62 onLoadLanguage(TtsEngine.lang, "", "") 63 onLoadLanguage(TtsEngine.lang, "", "")
63 } 64 }
64 65
  66 + override fun onDestroy() {
  67 + Log.i(TAG, "onDestroy tts service")
  68 + super.onDestroy()
  69 + }
  70 +
65 // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onislanguageavailable 71 // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onislanguageavailable
66 override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int { 72 override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int {
67 val lang = _lang ?: "" 73 val lang = _lang ?: ""
@@ -79,12 +85,15 @@ class TtsService : TextToSpeechService() { @@ -79,12 +85,15 @@ class TtsService : TextToSpeechService() {
79 85
80 // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onLoadLanguage(kotlin.String,%20kotlin.String,%20kotlin.String) 86 // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeechService#onLoadLanguage(kotlin.String,%20kotlin.String,%20kotlin.String)
81 override fun onLoadLanguage(_lang: String?, _country: String?, _variant: String?): Int { 87 override fun onLoadLanguage(_lang: String?, _country: String?, _variant: String?): Int {
  88 + Log.i(TAG, "onLoadLanguage: $_lang, $_country")
82 val lang = _lang ?: "" 89 val lang = _lang ?: ""
83 90
84 return if (lang == TtsEngine.lang) { 91 return if (lang == TtsEngine.lang) {
  92 + Log.i(TAG, "creating tts, lang :$lang")
85 TtsEngine.createTts(application) 93 TtsEngine.createTts(application)
86 TextToSpeech.LANG_AVAILABLE 94 TextToSpeech.LANG_AVAILABLE
87 } else { 95 } else {
  96 + Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}")
88 TextToSpeech.LANG_NOT_SUPPORTED 97 TextToSpeech.LANG_NOT_SUPPORTED
89 } 98 }
90 } 99 }
@@ -118,7 +127,7 @@ class TtsService : TextToSpeechService() { @@ -118,7 +127,7 @@ class TtsService : TextToSpeechService() {
118 return 127 return
119 } 128 }
120 129
121 - val ttsCallback = {floatSamples: FloatArray -> 130 + val ttsCallback = { floatSamples: FloatArray ->
122 // convert FloatArray to ByteArray 131 // convert FloatArray to ByteArray
123 val samples = floatArrayToByteArray(floatSamples) 132 val samples = floatArrayToByteArray(floatSamples)
124 val maxBufferSize: Int = callback.maxBufferSize 133 val maxBufferSize: Int = callback.maxBufferSize
@@ -136,7 +145,7 @@ class TtsService : TextToSpeechService() { @@ -136,7 +145,7 @@ class TtsService : TextToSpeechService() {
136 text = text, 145 text = text,
137 sid = TtsEngine.speakerId, 146 sid = TtsEngine.speakerId,
138 speed = TtsEngine.speed, 147 speed = TtsEngine.speed,
139 - callback=ttsCallback, 148 + callback = ttsCallback,
140 ) 149 )
141 150
142 callback.done() 151 callback.done()
  1 +package com.k2fsa.sherpa.onnx.tts.engine
  2 +
  3 +import android.app.Application
  4 +import android.os.FileUtils.ProgressListener
  5 +import android.speech.tts.TextToSpeech
  6 +import android.speech.tts.TextToSpeech.OnInitListener
  7 +import android.speech.tts.UtteranceProgressListener
  8 +import android.util.Log
  9 +import androidx.lifecycle.ViewModel
  10 +import java.util.Locale
  11 +
  12 +class TtsApp : Application() {
  13 + companion object {
  14 + lateinit var instance: TtsApp
  15 + }
  16 +
  17 + override fun onCreate() {
  18 + super.onCreate()
  19 + instance = this
  20 + }
  21 +
  22 +}
  23 +
  24 +class TtsViewModel : ViewModel() {
  25 +
  26 + // https://developer.android.com/reference/kotlin/android/speech/tts/TextToSpeech.OnInitListener
  27 + private val onInitListener = object : OnInitListener {
  28 + override fun onInit(status: Int) {
  29 + when (status) {
  30 + TextToSpeech.SUCCESS -> Log.i(TAG, "Init tts succeded")
  31 + TextToSpeech.ERROR -> Log.i(TAG, "Init tts failed")
  32 + else -> Log.i(TAG, "Unknown status $status")
  33 + }
  34 + }
  35 + }
  36 +
  37 + // https://developer.android.com/reference/kotlin/android/speech/tts/UtteranceProgressListener
  38 + private val utteranceProgressListener = object : UtteranceProgressListener() {
  39 + override fun onStart(utteranceId: String?) {
  40 + Log.i(TAG, "onStart: $utteranceId")
  41 + }
  42 +
  43 + override fun onStop(utteranceId: String?, interrupted: Boolean) {
  44 + Log.i(TAG, "onStop: $utteranceId, $interrupted")
  45 + super.onStop(utteranceId, interrupted)
  46 + }
  47 +
  48 + override fun onError(utteranceId: String?, errorCode: Int) {
  49 + Log.i(TAG, "onError: $utteranceId, $errorCode")
  50 + super.onError(utteranceId, errorCode)
  51 + }
  52 +
  53 + override fun onDone(utteranceId: String?) {
  54 + Log.i(TAG, "onDone: $utteranceId")
  55 + }
  56 +
  57 + @Deprecated("Deprecated in Java")
  58 + override fun onError(utteranceId: String?) {
  59 + Log.i(TAG, "onError: $utteranceId")
  60 + }
  61 + }
  62 +
  63 + val tts = TextToSpeech(TtsApp.instance, onInitListener, "com.k2fsa.sherpa.onnx.tts.engine")
  64 +
  65 + init {
  66 + tts.setLanguage(Locale(TtsEngine.lang!!))
  67 + tts.setOnUtteranceProgressListener(utteranceProgressListener)
  68 + }
  69 +
  70 + override fun onCleared() {
  71 + super.onCleared()
  72 + tts.shutdown()
  73 + }
  74 +}