Inverse text normalization API for other programming languages (#1019)

Fangjun Kuang · GitHub
Commit 6e09933d99c6c501f272b1a75dd2f8cfca17f150 6e09933d 1 parent b0f7ed3e
.github/scripts/test-dart.sh
.github/scripts/test-dot-net.sh
.github/scripts/test-nodejs-addon-npm.sh
.github/scripts/test-nodejs-npm.sh
.github/workflows/run-java-test.yaml
.github/workflows/test-dot-net.yaml
.github/workflows/test-go.yaml
.github/workflows/test-nodejs-addon-api.yaml
.gitignore
dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart
dart-api-examples/non-streaming-asr/run-paraformer-itn.sh
dotnet-examples/offline-decode-files/Program.cs
dotnet-examples/offline-decode-files/run-paraformer-itn.sh
dotnet-examples/offline-decode-files/run-telespeech-ctc.sh
go-api-examples/non-streaming-decode-files/main.go
go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh
go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh
java-api-examples/InverseTextNormalizationNonStreamingParaformer.java
java-api-examples/run-inverse-text-normalization-paraformer.sh
kotlin-api-examples/run.sh
--- a/.github/scripts/test-dart.sh
查看文件 @6e09933
+++ b/.github/scripts/test-dart.sh
查看文件 @6e09933
@@ -4,6 +4,41 @@ set -ex
 
 cd dart-api-examples
 
+ pushd non-streaming-asr
+ 
+ echo '----------paraformer itn----------'
+ ./run-paraformer-itn.sh
+ 
+ echo '----------paraformer----------'
+ ./run-paraformer.sh
+ rm -rf sherpa-onnx-*
+ 
+ echo '----------VAD with paraformer----------'
+ ./run-vad-with-paraformer.sh
+ rm -rf sherpa-onnx-*
+ 
+ echo '----------NeMo transducer----------'
+ ./run-nemo-transducer.sh
+ rm -rf sherpa-onnx-*
+ 
+ echo '----------NeMo CTC----------'
+ ./run-nemo-ctc.sh
+ rm -rf sherpa-onnx-*
+ 
+ echo '----------TeleSpeech CTC----------'
+ ./run-telespeech-ctc.sh
+ rm -rf sherpa-onnx-*
+ 
+ echo '----------whisper----------'
+ ./run-whisper.sh
+ rm -rf sherpa-onnx-*
+ 
+ echo '----------zipformer transducer----------'
+ ./run-zipformer-transducer.sh
+ rm -rf sherpa-onnx-*
+ 
+ popd # non-streaming-asr
+ 
 pushd tts
 
 echo '----------piper tts----------'
@@ -44,38 +79,6 @@ rm -rf sherpa-onnx-*
 
 popd # streaming-asr
 
- pushd non-streaming-asr
- 
- echo '----------VAD with paraformer----------'
- ./run-vad-with-paraformer.sh
- rm -rf sherpa-onnx-*
- 
- echo '----------NeMo transducer----------'
- ./run-nemo-transducer.sh
- rm -rf sherpa-onnx-*
- 
- echo '----------NeMo CTC----------'
- ./run-nemo-ctc.sh
- rm -rf sherpa-onnx-*
- 
- echo '----------TeleSpeech CTC----------'
- ./run-telespeech-ctc.sh
- rm -rf sherpa-onnx-*
- 
- echo '----------paraformer----------'
- ./run-paraformer.sh
- rm -rf sherpa-onnx-*
- 
- echo '----------whisper----------'
- ./run-whisper.sh
- rm -rf sherpa-onnx-*
- 
- echo '----------zipformer transducer----------'
- ./run-zipformer-transducer.sh
- rm -rf sherpa-onnx-*
- 
- popd # non-streaming-asr
- 
 pushd vad
 ./run.sh
 rm *.onnx
--- a/.github/scripts/test-dot-net.sh
查看文件 @6e09933
+++ b/.github/scripts/test-dot-net.sh
查看文件 @6e09933
@@ -3,6 +3,7 @@
 cd dotnet-examples/
 
 cd ./offline-decode-files
+ ./run-paraformer-itn.sh
 ./run-telespeech-ctc.sh
 ./run-nemo-ctc.sh
 ./run-paraformer.sh
--- a/.github/scripts/test-nodejs-addon-npm.sh
查看文件 @6e09933
+++ b/.github/scripts/test-nodejs-addon-npm.sh
查看文件 @6e09933
@@ -119,6 +119,12 @@ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
 rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
 
 node ./test_asr_non_streaming_paraformer.js
+ 
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ 
+ node ./test_asr_non_streaming_paraformer_itn.js
+ 
 rm -rf sherpa-onnx-paraformer-zh-2023-03-28
 
 echo "----------tts----------"
--- a/.github/scripts/test-nodejs-npm.sh
查看文件 @6e09933
+++ b/.github/scripts/test-nodejs-npm.sh
查看文件 @6e09933
@@ -11,6 +11,15 @@ ls -lh node_modules
 
 # offline asr
 
+ curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ ls -lh
+ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+ node ./test-offline-paraformer-itn.js
+ rm -rf sherpa-onnx-paraformer-zh-2023-03-28
+ 
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
 ls -lh
 tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
--- a/.github/workflows/run-java-test.yaml
查看文件 @6e09933
+++ b/.github/workflows/run-java-test.yaml
查看文件 @6e09933
@@ -190,6 +190,8 @@ jobs:
         shell: bash
         run: |
           cd ./java-api-examples
+           ./run-inverse-text-normalization-paraformer.sh
+ 
           ./run-non-streaming-decode-file-paraformer.sh
           rm -rf sherpa-onnx-paraformer-zh-*
 
--- a/.github/workflows/test-dot-net.yaml
查看文件 @6e09933
+++ b/.github/workflows/test-dot-net.yaml
查看文件 @6e09933
@@ -39,7 +39,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-latest, macos-latest, windows-latest]
+         os: [ubuntu-latest]
         python-version: ["3.8"]
 
     steps:
@@ -72,45 +72,18 @@ jobs:
 
           cmake --build . --target install --config Release
 
-       - name: Build sherpa-onnx for windows x86
-         if: matrix.os == 'windows-latest'
-         shell: bash
-         run: |
-           export CMAKE_CXX_COMPILER_LAUNCHER=ccache
-           export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
-           cmake --version
- 
-           mkdir build-win32
-           cd build-win32
-           cmake \
-             -A Win32 \
-             -DBUILD_SHARED_LIBS=ON \
-             -DCMAKE_INSTALL_PREFIX=./install \
-             -DCMAKE_BUILD_TYPE=Release \
-             -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-             -DBUILD_ESPEAK_NG_EXE=OFF \
-             -DSHERPA_ONNX_ENABLE_BINARY=ON \
-             ..
-           cmake --build . --target install --config Release
- 
       - uses: actions/upload-artifact@v4
         with:
           name: ${{ matrix.os }}
           path: ./build/install/lib/
 
-       - uses: actions/upload-artifact@v4
-         if: matrix.os == 'windows-latest'
-         with:
-           name: ${{ matrix.os }}-win32
-           path: ./build-win32/install/lib/
- 
   test-dot-net:
     runs-on: ${{ matrix.os }}
     needs: [build-libs]
     strategy:
       fail-fast: false
       matrix:
-         os: [ubuntu-latest, macos-latest] #, windows-latest]
+         os: [ubuntu-latest]
         python-version: ["3.8"]
 
     steps:
@@ -134,30 +107,11 @@ jobs:
           name: ubuntu-latest
           path: /tmp/linux
 
-       - name: Retrieve artifact from macos-latest
-         uses: actions/download-artifact@v4
-         with:
-           name: macos-latest
-           path: /tmp/macos
- 
-       - name: Retrieve artifact from windows-latest
-         uses: actions/download-artifact@v4
-         with:
-           name: windows-latest
-           path: /tmp/windows-x64
- 
-       - name: Retrieve artifact from windows-latest
-         uses: actions/download-artifact@v4
-         with:
-           name: windows-latest-win32
-           path: /tmp/windows-x86
- 
       - name: Setup .NET
         uses: actions/setup-dotnet@v4
         with:
           dotnet-version: |
             6.0.x
-             7.0.x
 
       - name: Check dotnet
         run: dotnet --info
@@ -171,15 +125,6 @@ jobs:
           echo "----------/tmp/linux----------"
           ls -lh /tmp/linux
 
-           echo "----------/tmp/macos----------"
-           ls -lh /tmp/macos
- 
-           echo "----------/tmp/windows-x64----------"
-           ls -lh /tmp/windows-x64
- 
-           echo "----------/tmp/windows-x86----------"
-           ls -lh /tmp/windows-x86
- 
       - name: Build
         shell: bash
         run: |
--- a/.github/workflows/test-go.yaml
查看文件 @6e09933
+++ b/.github/workflows/test-go.yaml
查看文件 @6e09933
@@ -127,7 +127,7 @@ jobs:
 
       - uses: actions/upload-artifact@v4
         with:
-           name: tts-waves
+           name: tts-waves-${{ matrix.os }}
           path: tts-waves
 
       - name: Test non-streaming decoding files (macOS)
@@ -154,6 +154,7 @@ jobs:
 
           echo "Test paraformer"
           ./run-paraformer.sh
+           ./run-paraformer-itn.sh
           rm -rf sherpa-onnx-paraformer-zh-2023-03-28
 
           echo "Test NeMo CTC"
--- a/.github/workflows/test-nodejs-addon-api.yaml
查看文件 @6e09933
+++ b/.github/workflows/test-nodejs-addon-api.yaml
查看文件 @6e09933
@@ -39,8 +39,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-         os: [macos-11, macos-14, ubuntu-20.04, ubuntu-22.04] #, windows-latest]
-         node-version: ["16", "17", "18", "19", "21", "22"]
+         os: [macos-latest, ubuntu-latest, ubuntu-latest]
+         node-version: ["16", "22"]
         python-version: ["3.8"]
 
     steps:
--- a/.gitignore
查看文件 @6e09933
+++ b/.gitignore
查看文件 @6e09933
@@ -107,3 +107,4 @@ package-lock.json
 sherpa-onnx-nemo-*
 sherpa-onnx-vits-*
 sherpa-onnx-telespeech-ctc-*
+ *.fst
--- a/dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart 0 → 100644
查看文件 @6e09933
+++ b/dart-api-examples/non-streaming-asr/bin/paraformer-itn.dart 0 → 100644
查看文件 @6e09933
+ // Copyright (c)  2024  Xiaomi Corporation
+ import 'dart:io';
+ import 'dart:typed_data';
+ 
+ import 'package:args/args.dart';
+ import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
+ 
+ import './init.dart';
+ 
+ void main(List<String> arguments) async {
+   await initSherpaOnnx();
+ 
+   final parser = ArgParser()
+     ..addOption('model', help: 'Path to the paraformer model')
+     ..addOption('tokens', help: 'Path to tokens.txt')
+     ..addOption('rule-fsts',
+         help: 'Path to rule fsts for inverse text normalization')
+     ..addOption('input-wav', help: 'Path to input.wav to transcribe');
+ 
+   final res = parser.parse(arguments);
+   if (res['model'] == null ||
+       res['tokens'] == null ||
+       res['rule-fsts'] == null ||
+       res['input-wav'] == null) {
+     print(parser.usage);
+     exit(1);
+   }
+ 
+   final model = res['model'] as String;
+   final tokens = res['tokens'] as String;
+   final ruleFsts = res['rule-fsts'] as String;
+   final inputWav = res['input-wav'] as String;
+ 
+   final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
+     model: model,
+   );
+ 
+   final modelConfig = sherpa_onnx.OfflineModelConfig(
+     paraformer: paraformer,
+     tokens: tokens,
+     debug: true,
+     numThreads: 1,
+     modelType: 'paraformer',
+   );
+   final config = sherpa_onnx.OfflineRecognizerConfig(
+     model: modelConfig,
+     ruleFsts: ruleFsts,
+   );
+   final recognizer = sherpa_onnx.OfflineRecognizer(config);
+ 
+   final waveData = sherpa_onnx.readWave(inputWav);
+   final stream = recognizer.createStream();
+ 
+   stream.acceptWaveform(
+       samples: waveData.samples, sampleRate: waveData.sampleRate);
+   recognizer.decode(stream);
+ 
+   final result = recognizer.getResult(stream);
+   print(result.text);
+ 
+   stream.free();
+   recognizer.free();
+ }
--- a/dart-api-examples/non-streaming-asr/run-paraformer-itn.sh 0 → 100755
查看文件 @6e09933
+++ b/dart-api-examples/non-streaming-asr/run-paraformer-itn.sh 0 → 100755
查看文件 @6e09933
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ dart pub get
+ 
+ if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ 
+   tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ fi
+ 
+ if [ ! -f ./itn-zh-number.wav ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ fi
+ 
+ if [ ! -f ./itn_zh_number.fst ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+ fi
+ 
+ dart run \
+   ./bin/paraformer-itn.dart \
+   --model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
+   --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
+   --rule-fsts ./itn_zh_number.fst \
+   --input-wav ./itn-zh-number.wav
--- a/dotnet-examples/offline-decode-files/Program.cs
查看文件 @6e09933
+++ b/dotnet-examples/offline-decode-files/Program.cs
查看文件 @6e09933
@@ -69,6 +69,10 @@ class OfflineDecodeFiles
             HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
     public string DecodingMethod { get; set; }
 
+     [Option("rule-fsts", Required = false, Default = "",
+             HelpText = "If not empty, path to rule fst for inverse text normalization")]
+     public string RuleFsts { get; set; }
+ 
     [Option("max-active-paths", Required = false, Default = 4,
         HelpText = @"Used only when --decoding--method is modified_beam_search.
 It specifies number of active paths to keep during the search")]
@@ -233,6 +237,7 @@ to download pre-trained Tdnn models.
     config.MaxActivePaths = options.MaxActivePaths;
     config.HotwordsFile = options.HotwordsFile;
     config.HotwordsScore = options.HotwordsScore;
+     config.RuleFsts = options.RuleFsts;
 
     config.ModelConfig.Debug = 0;
 
--- a/dotnet-examples/offline-decode-files/run-paraformer-itn.sh 0 → 100755
查看文件 @6e09933
+++ b/dotnet-examples/offline-decode-files/run-paraformer-itn.sh 0 → 100755
查看文件 @6e09933
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ fi
+ 
+ if [ ! -f ./itn-zh-number.wav ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ fi
+ 
+ if [ ! -f ./itn_zh_number.fst ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+ fi
+ 
+ dotnet run \
+   --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
+   --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
+   --rule-fsts=./itn_zh_number.fst \
+   --num-threads=2 \
+   --files ./itn-zh-number.wav
--- a/dotnet-examples/offline-decode-files/run-telespeech-ctc.sh
查看文件 @6e09933
+++ b/dotnet-examples/offline-decode-files/run-telespeech-ctc.sh
查看文件 @6e09933
@@ -11,5 +11,5 @@ fi
 dotnet run \
   --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
   --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
-   --model-type=telespeech-ctc \
+   --model-type=telespeech_ctc \
   --files ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav
--- a/go-api-examples/non-streaming-decode-files/main.go
查看文件 @6e09933
+++ b/go-api-examples/non-streaming-decode-files/main.go
查看文件 @6e09933
@@ -48,6 +48,8 @@ func main() {
 
 	flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
 	flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
+ 	flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
+ 	flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
 
 	flag.Parse()
 
--- a/go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh 0 → 100755
查看文件 @6e09933
+++ b/go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh 0 → 100755
查看文件 @6e09933
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d sherpa-onnx-paraformer-zh-2023-03-28 ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ fi
+ 
+ if [ ! -f ./itn-zh-number.wav ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ fi
+ 
+ if [ ! -f ./itn_zh_number.fst ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+ fi
+ 
+ go mod tidy
+ go build
+ 
+ ./non-streaming-decode-files \
+   --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
+   --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
+   --model-type paraformer \
+   --rule-fsts ./itn_zh_number.fst \
+   --debug 0 \
+   ./itn-zh-number.wav
--- a/go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh
查看文件 @6e09933
+++ b/go-api-examples/non-streaming-decode-files/run-telespeech-ctc.sh
查看文件 @6e09933
@@ -14,6 +14,6 @@ go build
 ./non-streaming-decode-files \
   --telespeech-ctc ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
   --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
-   --model-type telespeech-ctc \
+   --model-type telespeech_ctc \
   --debug 0 \
   ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav
--- a/java-api-examples/InverseTextNormalizationNonStreamingParaformer.java 0 → 100644
查看文件 @6e09933
+++ b/java-api-examples/InverseTextNormalizationNonStreamingParaformer.java 0 → 100644
查看文件 @6e09933
+ // Copyright 2024 Xiaomi Corporation
+ 
+ // This file shows how to use an offline paraformer, i.e., non-streaming paraformer,
+ // to decode files with inverse text normalization.
+ import com.k2fsa.sherpa.onnx.*;
+ 
+ public class InverseTextNormalizationNonStreamingParaformer {
+   public static void main(String[] args) {
+     // please refer to
+     // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english
+     // to download model files
+     String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
+     String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
+ 
+     // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+     String waveFilename = "./itn-zh-number.wav";
+ 
+     // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+     String ruleFsts = "./itn_zh_number.fst";
+ 
+     WaveReader reader = new WaveReader(waveFilename);
+ 
+     OfflineParaformerModelConfig paraformer =
+         OfflineParaformerModelConfig.builder().setModel(model).build();
+ 
+     OfflineModelConfig modelConfig =
+         OfflineModelConfig.builder()
+             .setParaformer(paraformer)
+             .setTokens(tokens)
+             .setNumThreads(1)
+             .setDebug(true)
+             .build();
+ 
+     OfflineRecognizerConfig config =
+         OfflineRecognizerConfig.builder()
+             .setOfflineModelConfig(modelConfig)
+             .setDecodingMethod("greedy_search")
+             .setRuleFsts(ruleFsts)
+             .build();
+ 
+     OfflineRecognizer recognizer = new OfflineRecognizer(config);
+     OfflineStream stream = recognizer.createStream();
+     stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
+ 
+     recognizer.decode(stream);
+ 
+     String text = recognizer.getResult(stream).getText();
+ 
+     System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
+ 
+     stream.release();
+     recognizer.release();
+   }
+ }
--- a/java-api-examples/run-inverse-text-normalization-paraformer.sh 0 → 100755
查看文件 @6e09933
+++ b/java-api-examples/run-inverse-text-normalization-paraformer.sh 0 → 100755
查看文件 @6e09933
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib  && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
+   mkdir -p ../build
+   pushd ../build
+   cmake \
+     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+     -DBUILD_SHARED_LIBS=ON \
+     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+     -DSHERPA_ONNX_ENABLE_JNI=ON \
+     ..
+ 
+   make -j4
+   ls -lh lib
+   popd
+ fi
+ 
+ if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
+   pushd ../sherpa-onnx/java-api
+   make
+   popd
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ 
+   tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+ fi
+ 
+ if [ ! -f ./itn-zh-number.wav ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ fi
+ 
+ if [ ! -f ./itn_zh_number.fst ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+ fi
+ 
+ java \
+   -Djava.library.path=$PWD/../build/lib \
+   -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
+   InverseTextNormalizationNonStreamingParaformer.java
--- a/kotlin-api-examples/run.sh
查看文件 @6e09933
+++ b/kotlin-api-examples/run.sh
查看文件 @6e09933
@@ -203,6 +203,34 @@ function testOfflineAsr() {
   java -Djava.library.path=../build/lib -jar $out_filename
 }
 
+ function testInverseTextNormalizationAsr() {
+   if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
+     curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+     tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+     rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
+   fi
+ 
+   if [ ! -f ./itn-zh-number.wav ]; then
+     curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+   fi
+ 
+   if [ ! -f ./itn_zh_number.fst ]; then
+     curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+   fi
+ 
+   out_filename=test_offline_asr.jar
+   kotlinc-jvm -include-runtime -d $out_filename \
+     test_itn_asr.kt \
+     FeatureConfig.kt \
+     OfflineRecognizer.kt \
+     OfflineStream.kt \
+     WaveReader.kt \
+     faked-asset-manager.kt
+ 
+   ls -lh $out_filename
+   java -Djava.library.path=../build/lib -jar $out_filename
+ }
+ 
 function testPunctuation() {
   if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
     curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
@@ -229,3 +257,4 @@ testAudioTagging
 testSpokenLanguageIdentification
 testOfflineAsr
 testPunctuation
+ testInverseTextNormalizationAsr
--- a/kotlin-api-examples/test_itn_asr.kt 0 → 100644
查看文件 @6e09933
+++ b/kotlin-api-examples/test_itn_asr.kt 0 → 100644
查看文件 @6e09933
+ package com.k2fsa.sherpa.onnx
+ 
+ fun main() {
+   test()
+ }
+ 
+ fun test() {
+   val recognizer = createOfflineRecognizer()
+   val waveFilename = "./itn-zh-number.wav";
+ 
+   val objArray = WaveReader.readWaveFromFile(
+       filename = waveFilename,
+   )
+   val samples: FloatArray = objArray[0] as FloatArray
+   val sampleRate: Int = objArray[1] as Int
+ 
+   val stream = recognizer.createStream()
+   stream.acceptWaveform(samples, sampleRate=sampleRate)
+   recognizer.decode(stream)
+ 
+   val result = recognizer.getResult(stream)
+   println(result)
+ 
+   stream.release()
+   recognizer.release()
+ }
+ 
+ fun createOfflineRecognizer(): OfflineRecognizer {
+   val config = OfflineRecognizerConfig(
+       featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
+       modelConfig = getOfflineModelConfig(0)!!,
+       ruleFsts = "./itn_zh_number.fst",
+   )
+ 
+   return OfflineRecognizer(config = config)
+ }
+ 
--- a/nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js 0 → 100644
查看文件 @6e09933
+++ b/nodejs-addon-examples/test_asr_non_streaming_paraformer_itn.js 0 → 100644
查看文件 @6e09933
+ // Copyright (c)  2024  Xiaomi Corporation
+ const sherpa_onnx = require('sherpa-onnx-node');
+ 
+ // Please download test files from
+ // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+ const config = {
+   'featConfig': {
+     'sampleRate': 16000,
+     'featureDim': 80,
+   },
+   'modelConfig': {
+     'paraformer': {
+       'model': './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
+     },
+     'tokens': './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
+     'numThreads': 2,
+     'provider': 'cpu',
+     'debug': 1,
+   },
+   // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+   ruleFsts: './itn_zh_number.fst',
+ };
+ 
+ // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ const waveFilename = './itn-zh-number.wav';
+ 
+ const recognizer = new sherpa_onnx.OfflineRecognizer(config);
+ console.log('Started')
+ let start = Date.now();
+ const stream = recognizer.createStream();
+ const wave = sherpa_onnx.readWave(waveFilename);
+ stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
+ 
+ recognizer.decode(stream);
+ result = recognizer.getResult(stream)
+ let stop = Date.now();
+ console.log('Done')
+ 
+ const elapsed_seconds = (stop - start) / 1000;
+ const duration = wave.samples.length / wave.sampleRate;
+ const real_time_factor = elapsed_seconds / duration;
+ console.log('Wave duration', duration.toFixed(3), 'secodns')
+ console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
+ console.log(
+     `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
+     real_time_factor.toFixed(3))
+ console.log(waveFilename)
+ console.log('result\n', result)
--- a/nodejs-examples/test-offline-paraformer-itn.js 0 → 100644
查看文件 @6e09933
+++ b/nodejs-examples/test-offline-paraformer-itn.js 0 → 100644
查看文件 @6e09933
+ // Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+ 
+ const fs = require('fs');
+ const {Readable} = require('stream');
+ const wav = require('wav');
+ 
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOfflineRecognizer() {
+   let featConfig = {
+     sampleRate: 16000,
+     featureDim: 80,
+   };
+ 
+   let modelConfig = {
+     transducer: {
+       encoder: '',
+       decoder: '',
+       joiner: '',
+     },
+     paraformer: {
+       model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
+     },
+     nemoCtc: {
+       model: '',
+     },
+     whisper: {
+       encoder: '',
+       decoder: '',
+       language: '',
+       task: '',
+       tailPaddings: -1,
+     },
+     tdnn: {
+       model: '',
+     },
+     tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
+     numThreads: 1,
+     debug: 0,
+     provider: 'cpu',
+     modelType: 'paraformer',
+   };
+ 
+   let lmConfig = {
+     model: '',
+     scale: 1.0,
+   };
+ 
+   let config = {
+     featConfig: featConfig,
+     modelConfig: modelConfig,
+     lmConfig: lmConfig,
+     decodingMethod: 'greedy_search',
+     maxActivePaths: 4,
+     hotwordsFile: '',
+     hotwordsScore: 1.5,
+     // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
+     ruleFsts: './itn_zh_number.fst',
+   };
+ 
+   return sherpa_onnx.createOfflineRecognizer(config);
+ }
+ 
+ 
+ const recognizer = createOfflineRecognizer();
+ const stream = recognizer.createStream();
+ 
+ // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
+ const waveFilename = './itn-zh-number.wav';
+ 
+ const reader = new wav.Reader();
+ const readable = new Readable().wrap(reader);
+ const buf = [];
+ 
+ reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
+   if (sampleRate != recognizer.config.featConfig.sampleRate) {
+     throw new Error(`Only support sampleRate ${
+         recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
+   }
+ 
+   if (audioFormat != 1) {
+     throw new Error(`Only support PCM format. Given ${audioFormat}`);
+   }
+ 
+   if (channels != 1) {
+     throw new Error(`Only a single channel. Given ${channel}`);
+   }
+ 
+   if (bitDepth != 16) {
+     throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
+   }
+ });
+ 
+ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
+     .pipe(reader)
+     .on('finish', function(err) {
+       // tail padding
+       const floatSamples =
+           new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
+ 
+       buf.push(floatSamples);
+       const flattened =
+           Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
+ 
+       stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
+       recognizer.decode(stream);
+       const text = recognizer.getResult(stream).text;
+       console.log(text);
+ 
+       stream.free();
+       recognizer.free();
+     });
+ 
+ readable.on('readable', function() {
+   let chunk;
+   while ((chunk = readable.read()) != null) {
+     const int16Samples = new Int16Array(
+         chunk.buffer, chunk.byteOffset,
+         chunk.length / Int16Array.BYTES_PER_ELEMENT);
+ 
+     const floatSamples = new Float32Array(int16Samples.length);
+     for (let i = 0; i < floatSamples.length; i++) {
+       floatSamples[i] = int16Samples[i] / 32768.0;
+     }
+ 
+     buf.push(floatSamples);
+   }
+ });
--- a/scripts/dotnet/OfflineRecognizerConfig.cs
查看文件 @6e09933
+++ b/scripts/dotnet/OfflineRecognizerConfig.cs
查看文件 @6e09933
@@ -21,7 +21,8 @@ namespace SherpaOnnx
             MaxActivePaths = 4;
             HotwordsFile = "";
             HotwordsScore = 1.5F;
- 
+             RuleFsts = "";
+             RuleFars = "";
         }
         public FeatureConfig FeatConfig;
         public OfflineModelConfig ModelConfig;
@@ -36,5 +37,11 @@ namespace SherpaOnnx
         public string HotwordsFile;
 
         public float HotwordsScore;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string RuleFsts;
+ 
+         [MarshalAs(UnmanagedType.LPStr)]
+         public string RuleFars;
     }
 }
--- a/scripts/go/_internal/non-streaming-decode-files/run-paraformer-itn.sh 0 → 120000
查看文件 @6e09933
+++ b/scripts/go/_internal/non-streaming-decode-files/run-paraformer-itn.sh 0 → 120000
查看文件 @6e09933
+ ../../../../go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh
\ No newline at end of file
--- a/scripts/go/sherpa_onnx.go
查看文件 @6e09933
+++ b/scripts/go/sherpa_onnx.go
查看文件 @6e09933
@@ -397,6 +397,10 @@ type OfflineRecognizerConfig struct {
 
 	// Used only when DecodingMethod is modified_beam_search.
 	MaxActivePaths int
+ 	HotwordsFile   string
+ 	HotwordsScore  float32
+ 	RuleFsts       string
+ 	RuleFars       string
 }
 
 // It wraps a pointer from C
@@ -491,6 +495,17 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
 
 	c.max_active_paths = C.int(config.MaxActivePaths)
 
+ 	c.hotwords_file = C.CString(config.HotwordsFile)
+ 	defer C.free(unsafe.Pointer(c.hotwords_file))
+ 
+ 	c.hotwords_score = C.float(config.HotwordsScore)
+ 
+ 	c.rule_fsts = C.CString(config.RuleFsts)
+ 	defer C.free(unsafe.Pointer(c.rule_fsts))
+ 
+ 	c.rule_fars = C.CString(config.RuleFars)
+ 	defer C.free(unsafe.Pointer(c.rule_fars))
+ 
 	recognizer := &OfflineRecognizer{}
 	recognizer.impl = C.CreateOfflineRecognizer(&c)
 
--- a/scripts/node-addon-api/README.md
查看文件 @6e09933
+++ b/scripts/node-addon-api/README.md
查看文件 @6e09933
@@ -15,8 +15,8 @@ cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
 make -j install
 export PKG_CONFIG_PATH=$PWD/install:$PKG_CONFIG_PATH
 cd ../scripts/node-addon-api/
- 
- ./node_modules/.bin/node-gyp build --verbose
+ npm i
+ ./node_modules/.bin/cmake-js compile --log-level verbose
 
 # see test/test_asr_streaming_transducer.js
 # for usages
--- a/scripts/node-addon-api/src/non-streaming-asr.cc
查看文件 @6e09933
+++ b/scripts/node-addon-api/src/non-streaming-asr.cc
查看文件 @6e09933
@@ -180,6 +180,8 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
   SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
   SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
   SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
+   SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
+   SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
 
   SherpaOnnxOfflineRecognizer *recognizer = CreateOfflineRecognizer(&c);
 
@@ -259,6 +261,14 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
     delete[] c.hotwords_file;
   }
 
+   if (c.rule_fsts) {
+     delete[] c.rule_fsts;
+   }
+ 
+   if (c.rule_fars) {
+     delete[] c.rule_fars;
+   }
+ 
   if (!recognizer) {
     Napi::TypeError::New(env, "Please check your config!")
         .ThrowAsJavaScriptException();
--- a/scripts/node-addon-api/src/non-streaming-tts.cc
查看文件 @6e09933
+++ b/scripts/node-addon-api/src/non-streaming-tts.cc
查看文件 @6e09933
@@ -44,7 +44,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
 
   c.vits = GetOfflineTtsVitsModelConfig(o);
 
-   SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, num_threads);
+   SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
 
   if (o.Has("debug") &&
       (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @6e09933
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @6e09933
@@ -388,6 +388,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
   recognizer_config.hotwords_score =
       SHERPA_ONNX_OR(config->hotwords_score, 1.5);
 
+   recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
+   recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
+ 
   if (config->model_config.debug) {
     SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
   }
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @6e09933
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @6e09933
@@ -411,6 +411,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
 
   /// Bonus score for each token in hotwords.
   float hotwords_score;
+   const char *rule_fsts;
+   const char *rule_fars;
 } SherpaOnnxOfflineRecognizerConfig;
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer
--- a/sherpa-onnx/flutter/lib/src/offline_recognizer.dart
查看文件 @6e09933
+++ b/sherpa-onnx/flutter/lib/src/offline_recognizer.dart
查看文件 @6e09933
@@ -137,11 +137,13 @@ class OfflineRecognizerConfig {
     this.maxActivePaths = 4,
     this.hotwordsFile = '',
     this.hotwordsScore = 1.5,
+     this.ruleFsts = '',
+     this.ruleFars = '',
   });
 
   @override
   String toString() {
-     return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore)';
+     return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ruleFsts: $ruleFsts, ruleFars: $ruleFars)';
   }
 
   final FeatureConfig feat;
@@ -154,6 +156,9 @@ class OfflineRecognizerConfig {
   final String hotwordsFile;
 
   final double hotwordsScore;
+ 
+   final String ruleFsts;
+   final String ruleFars;
 }
 
 class OfflineRecognizerResult {
@@ -232,8 +237,13 @@ class OfflineRecognizer {
     c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8();
     c.ref.hotwordsScore = config.hotwordsScore;
 
+     c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
+     c.ref.ruleFars = config.ruleFars.toNativeUtf8();
+ 
     final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
 
+     calloc.free(c.ref.ruleFars);
+     calloc.free(c.ref.ruleFsts);
     calloc.free(c.ref.hotwordsFile);
     calloc.free(c.ref.decodingMethod);
     calloc.free(c.ref.lm.model);
--- a/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
查看文件 @6e09933
+++ b/sherpa-onnx/flutter/lib/src/sherpa_onnx_bindings.dart
查看文件 @6e09933
@@ -130,6 +130,9 @@ final class SherpaOnnxOfflineRecognizerConfig extends Struct {
 
   @Float()
   external double hotwordsScore;
+ 
+   external Pointer<Utf8> ruleFsts;
+   external Pointer<Utf8> ruleFars;
 }
 
 final class SherpaOnnxOnlineTransducerModelConfig extends Struct {
--- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java
查看文件 @6e09933
+++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineRecognizerConfig.java
查看文件 @6e09933
@@ -9,6 +9,8 @@ public class OfflineRecognizerConfig {
     private final int maxActivePaths;
     private final String hotwordsFile;
     private final float hotwordsScore;
+     private final String ruleFsts;
+     private final String ruleFars;
 
     private OfflineRecognizerConfig(Builder builder) {
         this.featConfig = builder.featConfig;
@@ -17,6 +19,8 @@ public class OfflineRecognizerConfig {
         this.maxActivePaths = builder.maxActivePaths;
         this.hotwordsFile = builder.hotwordsFile;
         this.hotwordsScore = builder.hotwordsScore;
+         this.ruleFsts = builder.ruleFsts;
+         this.ruleFars = builder.ruleFars;
     }
 
     public static Builder builder() {
@@ -34,6 +38,8 @@ public class OfflineRecognizerConfig {
         private int maxActivePaths = 4;
         private String hotwordsFile = "";
         private float hotwordsScore = 1.5f;
+         private String ruleFsts = "";
+         private String ruleFars = "";
 
         public OfflineRecognizerConfig build() {
             return new OfflineRecognizerConfig(this);
@@ -68,5 +74,15 @@ public class OfflineRecognizerConfig {
             this.hotwordsScore = hotwordsScore;
             return this;
         }
+ 
+         public Builder setRuleFsts(String ruleFsts) {
+             this.ruleFsts = ruleFsts;
+             return this;
+         }
+ 
+         public Builder setRuleFars(String ruleFars) {
+             this.ruleFars = ruleFars;
+             return this;
+         }
     }
 }
--- a/sherpa-onnx/jni/offline-recognizer.cc
查看文件 @6e09933
+++ b/sherpa-onnx/jni/offline-recognizer.cc
查看文件 @6e09933
@@ -34,6 +34,18 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
   fid = env->GetFieldID(cls, "hotwordsScore", "F");
   ans.hotwords_score = env->GetFloatField(config, fid);
 
+   fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;");
+   s = (jstring)env->GetObjectField(config, fid);
+   p = env->GetStringUTFChars(s, nullptr);
+   ans.rule_fsts = p;
+   env->ReleaseStringUTFChars(s, p);
+ 
+   fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
+   s = (jstring)env->GetObjectField(config, fid);
+   p = env->GetStringUTFChars(s, nullptr);
+   ans.rule_fars = p;
+   env->ReleaseStringUTFChars(s, p);
+ 
   //---------- feat config ----------
   fid = env->GetFieldID(cls, "featConfig",
                         "Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
--- a/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
查看文件 @6e09933
+++ b/sherpa-onnx/kotlin-api/OfflineRecognizer.kt
查看文件 @6e09933
@@ -53,6 +53,8 @@ data class OfflineRecognizerConfig(
     var maxActivePaths: Int = 4,
     var hotwordsFile: String = "",
     var hotwordsScore: Float = 1.5f,
+     var ruleFsts: String = "",
+     var ruleFars: String = "",
 )
 
 class OfflineRecognizer(
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @6e09933
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @6e09933
@@ -387,7 +387,9 @@ func sherpaOnnxOfflineRecognizerConfig(
   decodingMethod: String = "greedy_search",
   maxActivePaths: Int = 4,
   hotwordsFile: String = "",
-   hotwordsScore: Float = 1.5
+   hotwordsScore: Float = 1.5,
+   ruleFsts: String = "",
+   ruleFars: String = ""
 ) -> SherpaOnnxOfflineRecognizerConfig {
   return SherpaOnnxOfflineRecognizerConfig(
     feat_config: featConfig,
@@ -396,7 +398,9 @@ func sherpaOnnxOfflineRecognizerConfig(
     decoding_method: toCPointer(decodingMethod),
     max_active_paths: Int32(maxActivePaths),
     hotwords_file: toCPointer(hotwordsFile),
-     hotwords_score: hotwordsScore
+     hotwords_score: hotwordsScore,
+     rule_fsts: toCPointer(ruleFsts),
+     rule_fars: toCPointer(ruleFars)
   )
 }
 
--- a/wasm/asr/sherpa-onnx-asr.js
查看文件 @6e09933
+++ b/wasm/asr/sherpa-onnx-asr.js
查看文件 @6e09933
@@ -628,7 +628,7 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
   const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
   const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
 
-   const len = feat.len + model.len + lm.len + 4 * 4;
+   const len = feat.len + model.len + lm.len + 6 * 4;
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -643,7 +643,10 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
 
   const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
   const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
-   const bufferLen = decodingMethodLen + hotwordsFileLen;
+   const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
+   const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
+   const bufferLen =
+       decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen;
   const buffer = Module._malloc(bufferLen);
 
   offset = 0;
@@ -651,6 +654,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
   offset += decodingMethodLen;
 
   Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
+   offset += hotwordsFileLen;
+ 
+   Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
+   offset += ruleFstsLen;
+ 
+   Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen);
+   offset += ruleFarsLen;
 
   offset = feat.len + model.len + lm.len;
 
@@ -666,6 +676,15 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
   Module.setValue(ptr + offset, config.hotwordsScore, 'float');
   offset += 4;
 
+   Module.setValue(
+       ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
+   offset += 4;
+ 
+   Module.setValue(
+       ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen,
+       'i8*');
+   offset += 4;
+ 
   return {
     buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
   }
--- a/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
查看文件 @6e09933
+++ b/wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
查看文件 @6e09933
@@ -29,7 +29,7 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
                   sizeof(SherpaOnnxFeatureConfig) +
                       sizeof(SherpaOnnxOfflineLMConfig) +
-                       sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4,
+                       sizeof(SherpaOnnxOfflineModelConfig) + 6 * 4,
               "");
 
 void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
@@ -103,6 +103,8 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
   fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
   fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
   fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
+   fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
+   fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
 }
 
 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {