Fangjun Kuang
Committed by GitHub

Support homophone replacer in Android asr demo. (#2210)

@@ -12,6 +12,9 @@ import android.widget.Button @@ -12,6 +12,9 @@ import android.widget.Button
12 import android.widget.TextView 12 import android.widget.TextView
13 import androidx.appcompat.app.AppCompatActivity 13 import androidx.appcompat.app.AppCompatActivity
14 import androidx.core.app.ActivityCompat 14 import androidx.core.app.ActivityCompat
  15 +import java.io.File
  16 +import java.io.FileOutputStream
  17 +import java.io.IOException
15 import kotlin.concurrent.thread 18 import kotlin.concurrent.thread
16 19
17 private const val TAG = "sherpa-onnx" 20 private const val TAG = "sherpa-onnx"
@@ -199,8 +202,22 @@ class MainActivity : AppCompatActivity() { @@ -199,8 +202,22 @@ class MainActivity : AppCompatActivity() {
199 var ruleFsts : String? 202 var ruleFsts : String?
200 ruleFsts = null 203 ruleFsts = null
201 204
  205 + val useHr = false
  206 + val hr = HomophoneReplacerConfig(
  207 + // Used only when useHr is true
  208 + // Please download the following 3 files from
  209 + // https://github.com/k2-fsa/sherpa-onnx/releases/tag/hr-files
  210 + //
  211 + // dict and lexicon.txt can be shared by different apps
  212 + //
  213 + // replace.fst is specific for an app
  214 + dictDir = "dict",
  215 + lexicon = "lexicon.txt",
  216 + ruleFsts = "replace.fst",
  217 + )
  218 +
202 Log.i(TAG, "Select model type $type") 219 Log.i(TAG, "Select model type $type")
203 - val config = OnlineRecognizerConfig( 220 + var config = OnlineRecognizerConfig(
204 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80), 221 featConfig = getFeatureConfig(sampleRate = sampleRateInHz, featureDim = 80),
205 modelConfig = getModelConfig(type = type)!!, 222 modelConfig = getModelConfig(type = type)!!,
206 // lmConfig = getOnlineLMConfig(type = type), 223 // lmConfig = getOnlineLMConfig(type = type),
@@ -212,9 +229,66 @@ class MainActivity : AppCompatActivity() { @@ -212,9 +229,66 @@ class MainActivity : AppCompatActivity() {
212 config.ruleFsts = ruleFsts 229 config.ruleFsts = ruleFsts
213 } 230 }
214 231
  232 + if (useHr) {
  233 + if (hr.dictDir.isNotEmpty() && hr.dictDir.first() != '/') {
  234 + // We need to copy it from the assets directory to some path
  235 + val newDir = copyDataDir(hr.dictDir)
  236 + hr.dictDir = "$newDir/${hr.dictDir}"
  237 + }
  238 + config.hr = hr
  239 + }
  240 +
215 recognizer = OnlineRecognizer( 241 recognizer = OnlineRecognizer(
216 assetManager = application.assets, 242 assetManager = application.assets,
217 config = config, 243 config = config,
218 ) 244 )
219 } 245 }
  246 + private fun copyDataDir(dataDir: String): String {
  247 + Log.i(TAG, "data dir is $dataDir")
  248 + copyAssets(dataDir)
  249 +
  250 + val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
  251 + Log.i(TAG, "newDataDir: $newDataDir")
  252 + return newDataDir
  253 + }
  254 +
  255 + private fun copyAssets(path: String) {
  256 + val assets: Array<String>?
  257 + try {
  258 + assets = application.assets.list(path)
  259 + if (assets!!.isEmpty()) {
  260 + copyFile(path)
  261 + } else {
  262 + val fullPath = "${application.getExternalFilesDir(null)}/$path"
  263 + val dir = File(fullPath)
  264 + dir.mkdirs()
  265 + for (asset in assets.iterator()) {
  266 + val p: String = if (path == "") "" else path + "/"
  267 + copyAssets(p + asset)
  268 + }
  269 + }
  270 + } catch (ex: IOException) {
  271 + Log.e(TAG, "Failed to copy $path. $ex")
  272 + }
  273 + }
  274 +
  275 + private fun copyFile(filename: String) {
  276 + try {
  277 + val istream = application.assets.open(filename)
  278 + val newFilename = application.getExternalFilesDir(null).toString() + "/" + filename
  279 + val ostream = FileOutputStream(newFilename)
  280 + // Log.i(TAG, "Copying $filename to $newFilename")
  281 + val buffer = ByteArray(1024)
  282 + var read = 0
  283 + while (read != -1) {
  284 + ostream.write(buffer, 0, read)
  285 + read = istream.read(buffer)
  286 + }
  287 + istream.close()
  288 + ostream.flush()
  289 + ostream.close()
  290 + } catch (ex: Exception) {
  291 + Log.e(TAG, "Failed to copy $filename, $ex")
  292 + }
  293 + }
220 } 294 }
@@ -296,7 +296,7 @@ HomophoneReplacer::HomophoneReplacer(Manager *mgr, @@ -296,7 +296,7 @@ HomophoneReplacer::HomophoneReplacer(Manager *mgr,
296 HomophoneReplacer::~HomophoneReplacer() = default; 296 HomophoneReplacer::~HomophoneReplacer() = default;
297 297
298 std::string HomophoneReplacer::Apply(const std::string &text) const { 298 std::string HomophoneReplacer::Apply(const std::string &text) const {
299 - return impl_->Apply(text); 299 + return RemoveInvalidUtf8Sequences(impl_->Apply(text));
300 } 300 }
301 301
302 #if __ANDROID_API__ >= 9 302 #if __ANDROID_API__ >= 9