Fangjun Kuang
Committed by GitHub

Inverse text normalization API for other programming languages (#1019)

正在显示 39 个修改的文件 包含 669 行增加104 行删除
@@ -4,6 +4,41 @@ set -ex @@ -4,6 +4,41 @@ set -ex
4 4
5 cd dart-api-examples 5 cd dart-api-examples
6 6
  7 +pushd non-streaming-asr
  8 +
  9 +echo '----------paraformer itn----------'
  10 +./run-paraformer-itn.sh
  11 +
  12 +echo '----------paraformer----------'
  13 +./run-paraformer.sh
  14 +rm -rf sherpa-onnx-*
  15 +
  16 +echo '----------VAD with paraformer----------'
  17 +./run-vad-with-paraformer.sh
  18 +rm -rf sherpa-onnx-*
  19 +
  20 +echo '----------NeMo transducer----------'
  21 +./run-nemo-transducer.sh
  22 +rm -rf sherpa-onnx-*
  23 +
  24 +echo '----------NeMo CTC----------'
  25 +./run-nemo-ctc.sh
  26 +rm -rf sherpa-onnx-*
  27 +
  28 +echo '----------TeleSpeech CTC----------'
  29 +./run-telespeech-ctc.sh
  30 +rm -rf sherpa-onnx-*
  31 +
  32 +echo '----------whisper----------'
  33 +./run-whisper.sh
  34 +rm -rf sherpa-onnx-*
  35 +
  36 +echo '----------zipformer transducer----------'
  37 +./run-zipformer-transducer.sh
  38 +rm -rf sherpa-onnx-*
  39 +
  40 +popd # non-streaming-asr
  41 +
7 pushd tts 42 pushd tts
8 43
9 echo '----------piper tts----------' 44 echo '----------piper tts----------'
@@ -44,38 +79,6 @@ rm -rf sherpa-onnx-* @@ -44,38 +79,6 @@ rm -rf sherpa-onnx-*
44 79
45 popd # streaming-asr 80 popd # streaming-asr
46 81
47 -pushd non-streaming-asr  
48 -  
49 -echo '----------VAD with paraformer----------'  
50 -./run-vad-with-paraformer.sh  
51 -rm -rf sherpa-onnx-*  
52 -  
53 -echo '----------NeMo transducer----------'  
54 -./run-nemo-transducer.sh  
55 -rm -rf sherpa-onnx-*  
56 -  
57 -echo '----------NeMo CTC----------'  
58 -./run-nemo-ctc.sh  
59 -rm -rf sherpa-onnx-*  
60 -  
61 -echo '----------TeleSpeech CTC----------'  
62 -./run-telespeech-ctc.sh  
63 -rm -rf sherpa-onnx-*  
64 -  
65 -echo '----------paraformer----------'  
66 -./run-paraformer.sh  
67 -rm -rf sherpa-onnx-*  
68 -  
69 -echo '----------whisper----------'  
70 -./run-whisper.sh  
71 -rm -rf sherpa-onnx-*  
72 -  
73 -echo '----------zipformer transducer----------'  
74 -./run-zipformer-transducer.sh  
75 -rm -rf sherpa-onnx-*  
76 -  
77 -popd # non-streaming-asr  
78 -  
79 pushd vad 82 pushd vad
80 ./run.sh 83 ./run.sh
81 rm *.onnx 84 rm *.onnx
@@ -3,6 +3,7 @@ @@ -3,6 +3,7 @@
3 cd dotnet-examples/ 3 cd dotnet-examples/
4 4
5 cd ./offline-decode-files 5 cd ./offline-decode-files
  6 +./run-paraformer-itn.sh
6 ./run-telespeech-ctc.sh 7 ./run-telespeech-ctc.sh
7 ./run-nemo-ctc.sh 8 ./run-nemo-ctc.sh
8 ./run-paraformer.sh 9 ./run-paraformer.sh
@@ -119,6 +119,12 @@ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 @@ -119,6 +119,12 @@ tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
119 rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2 119 rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
120 120
121 node ./test_asr_non_streaming_paraformer.js 121 node ./test_asr_non_streaming_paraformer.js
  122 +
  123 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  124 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  125 +
  126 +node ./test_asr_non_streaming_paraformer_itn.js
  127 +
122 rm -rf sherpa-onnx-paraformer-zh-2023-03-28 128 rm -rf sherpa-onnx-paraformer-zh-2023-03-28
123 129
124 echo "----------tts----------" 130 echo "----------tts----------"
@@ -11,6 +11,15 @@ ls -lh node_modules @@ -11,6 +11,15 @@ ls -lh node_modules
11 11
12 # offline asr 12 # offline asr
13 13
  14 +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  15 +ls -lh
  16 +tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  17 +rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  18 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  19 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  20 +node ./test-offline-paraformer-itn.js
  21 +rm -rf sherpa-onnx-paraformer-zh-2023-03-28
  22 +
14 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 23 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
15 ls -lh 24 ls -lh
16 tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2 25 tar xvf sherpa-onnx-nemo-ctc-en-conformer-small.tar.bz2
@@ -190,6 +190,8 @@ jobs: @@ -190,6 +190,8 @@ jobs:
190 shell: bash 190 shell: bash
191 run: | 191 run: |
192 cd ./java-api-examples 192 cd ./java-api-examples
  193 + ./run-inverse-text-normalization-paraformer.sh
  194 +
193 ./run-non-streaming-decode-file-paraformer.sh 195 ./run-non-streaming-decode-file-paraformer.sh
194 rm -rf sherpa-onnx-paraformer-zh-* 196 rm -rf sherpa-onnx-paraformer-zh-*
195 197
@@ -39,7 +39,7 @@ jobs: @@ -39,7 +39,7 @@ jobs:
39 strategy: 39 strategy:
40 fail-fast: false 40 fail-fast: false
41 matrix: 41 matrix:
42 - os: [ubuntu-latest, macos-latest, windows-latest] 42 + os: [ubuntu-latest]
43 python-version: ["3.8"] 43 python-version: ["3.8"]
44 44
45 steps: 45 steps:
@@ -72,45 +72,18 @@ jobs: @@ -72,45 +72,18 @@ jobs:
72 72
73 cmake --build . --target install --config Release 73 cmake --build . --target install --config Release
74 74
75 - - name: Build sherpa-onnx for windows x86  
76 - if: matrix.os == 'windows-latest'  
77 - shell: bash  
78 - run: |  
79 - export CMAKE_CXX_COMPILER_LAUNCHER=ccache  
80 - export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"  
81 - cmake --version  
82 -  
83 - mkdir build-win32  
84 - cd build-win32  
85 - cmake \  
86 - -A Win32 \  
87 - -DBUILD_SHARED_LIBS=ON \  
88 - -DCMAKE_INSTALL_PREFIX=./install \  
89 - -DCMAKE_BUILD_TYPE=Release \  
90 - -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \  
91 - -DBUILD_ESPEAK_NG_EXE=OFF \  
92 - -DSHERPA_ONNX_ENABLE_BINARY=ON \  
93 - ..  
94 - cmake --build . --target install --config Release  
95 -  
96 - uses: actions/upload-artifact@v4 75 - uses: actions/upload-artifact@v4
97 with: 76 with:
98 name: ${{ matrix.os }} 77 name: ${{ matrix.os }}
99 path: ./build/install/lib/ 78 path: ./build/install/lib/
100 79
101 - - uses: actions/upload-artifact@v4  
102 - if: matrix.os == 'windows-latest'  
103 - with:  
104 - name: ${{ matrix.os }}-win32  
105 - path: ./build-win32/install/lib/  
106 -  
107 test-dot-net: 80 test-dot-net:
108 runs-on: ${{ matrix.os }} 81 runs-on: ${{ matrix.os }}
109 needs: [build-libs] 82 needs: [build-libs]
110 strategy: 83 strategy:
111 fail-fast: false 84 fail-fast: false
112 matrix: 85 matrix:
113 - os: [ubuntu-latest, macos-latest] #, windows-latest] 86 + os: [ubuntu-latest]
114 python-version: ["3.8"] 87 python-version: ["3.8"]
115 88
116 steps: 89 steps:
@@ -134,30 +107,11 @@ jobs: @@ -134,30 +107,11 @@ jobs:
134 name: ubuntu-latest 107 name: ubuntu-latest
135 path: /tmp/linux 108 path: /tmp/linux
136 109
137 - - name: Retrieve artifact from macos-latest  
138 - uses: actions/download-artifact@v4  
139 - with:  
140 - name: macos-latest  
141 - path: /tmp/macos  
142 -  
143 - - name: Retrieve artifact from windows-latest  
144 - uses: actions/download-artifact@v4  
145 - with:  
146 - name: windows-latest  
147 - path: /tmp/windows-x64  
148 -  
149 - - name: Retrieve artifact from windows-latest  
150 - uses: actions/download-artifact@v4  
151 - with:  
152 - name: windows-latest-win32  
153 - path: /tmp/windows-x86  
154 -  
155 - name: Setup .NET 110 - name: Setup .NET
156 uses: actions/setup-dotnet@v4 111 uses: actions/setup-dotnet@v4
157 with: 112 with:
158 dotnet-version: | 113 dotnet-version: |
159 6.0.x 114 6.0.x
160 - 7.0.x  
161 115
162 - name: Check dotnet 116 - name: Check dotnet
163 run: dotnet --info 117 run: dotnet --info
@@ -171,15 +125,6 @@ jobs: @@ -171,15 +125,6 @@ jobs:
171 echo "----------/tmp/linux----------" 125 echo "----------/tmp/linux----------"
172 ls -lh /tmp/linux 126 ls -lh /tmp/linux
173 127
174 - echo "----------/tmp/macos----------"  
175 - ls -lh /tmp/macos  
176 -  
177 - echo "----------/tmp/windows-x64----------"  
178 - ls -lh /tmp/windows-x64  
179 -  
180 - echo "----------/tmp/windows-x86----------"  
181 - ls -lh /tmp/windows-x86  
182 -  
183 - name: Build 128 - name: Build
184 shell: bash 129 shell: bash
185 run: | 130 run: |
@@ -127,7 +127,7 @@ jobs: @@ -127,7 +127,7 @@ jobs:
127 127
128 - uses: actions/upload-artifact@v4 128 - uses: actions/upload-artifact@v4
129 with: 129 with:
130 - name: tts-waves 130 + name: tts-waves-${{ matrix.os }}
131 path: tts-waves 131 path: tts-waves
132 132
133 - name: Test non-streaming decoding files (macOS) 133 - name: Test non-streaming decoding files (macOS)
@@ -154,6 +154,7 @@ jobs: @@ -154,6 +154,7 @@ jobs:
154 154
155 echo "Test paraformer" 155 echo "Test paraformer"
156 ./run-paraformer.sh 156 ./run-paraformer.sh
  157 + ./run-paraformer-itn.sh
157 rm -rf sherpa-onnx-paraformer-zh-2023-03-28 158 rm -rf sherpa-onnx-paraformer-zh-2023-03-28
158 159
159 echo "Test NeMo CTC" 160 echo "Test NeMo CTC"
@@ -39,8 +39,8 @@ jobs: @@ -39,8 +39,8 @@ jobs:
39 strategy: 39 strategy:
40 fail-fast: false 40 fail-fast: false
41 matrix: 41 matrix:
42 - os: [macos-11, macos-14, ubuntu-20.04, ubuntu-22.04] #, windows-latest]  
43 - node-version: ["16", "17", "18", "19", "21", "22"] 42 + os: [macos-latest, ubuntu-latest, ubuntu-latest]
  43 + node-version: ["16", "22"]
44 python-version: ["3.8"] 44 python-version: ["3.8"]
45 45
46 steps: 46 steps:
@@ -107,3 +107,4 @@ package-lock.json @@ -107,3 +107,4 @@ package-lock.json
107 sherpa-onnx-nemo-* 107 sherpa-onnx-nemo-*
108 sherpa-onnx-vits-* 108 sherpa-onnx-vits-*
109 sherpa-onnx-telespeech-ctc-* 109 sherpa-onnx-telespeech-ctc-*
  110 +*.fst
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +import 'dart:io';
  3 +import 'dart:typed_data';
  4 +
  5 +import 'package:args/args.dart';
  6 +import 'package:sherpa_onnx/sherpa_onnx.dart' as sherpa_onnx;
  7 +
  8 +import './init.dart';
  9 +
  10 +void main(List<String> arguments) async {
  11 + await initSherpaOnnx();
  12 +
  13 + final parser = ArgParser()
  14 + ..addOption('model', help: 'Path to the paraformer model')
  15 + ..addOption('tokens', help: 'Path to tokens.txt')
  16 + ..addOption('rule-fsts',
  17 + help: 'Path to rule fsts for inverse text normalization')
  18 + ..addOption('input-wav', help: 'Path to input.wav to transcribe');
  19 +
  20 + final res = parser.parse(arguments);
  21 + if (res['model'] == null ||
  22 + res['tokens'] == null ||
  23 + res['rule-fsts'] == null ||
  24 + res['input-wav'] == null) {
  25 + print(parser.usage);
  26 + exit(1);
  27 + }
  28 +
  29 + final model = res['model'] as String;
  30 + final tokens = res['tokens'] as String;
  31 + final ruleFsts = res['rule-fsts'] as String;
  32 + final inputWav = res['input-wav'] as String;
  33 +
  34 + final paraformer = sherpa_onnx.OfflineParaformerModelConfig(
  35 + model: model,
  36 + );
  37 +
  38 + final modelConfig = sherpa_onnx.OfflineModelConfig(
  39 + paraformer: paraformer,
  40 + tokens: tokens,
  41 + debug: true,
  42 + numThreads: 1,
  43 + modelType: 'paraformer',
  44 + );
  45 + final config = sherpa_onnx.OfflineRecognizerConfig(
  46 + model: modelConfig,
  47 + ruleFsts: ruleFsts,
  48 + );
  49 + final recognizer = sherpa_onnx.OfflineRecognizer(config);
  50 +
  51 + final waveData = sherpa_onnx.readWave(inputWav);
  52 + final stream = recognizer.createStream();
  53 +
  54 + stream.acceptWaveform(
  55 + samples: waveData.samples, sampleRate: waveData.sampleRate);
  56 + recognizer.decode(stream);
  57 +
  58 + final result = recognizer.getResult(stream);
  59 + print(result.text);
  60 +
  61 + stream.free();
  62 + recognizer.free();
  63 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +dart pub get
  6 +
  7 +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
  8 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  9 +
  10 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  11 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  12 +fi
  13 +
  14 +if [ ! -f ./itn-zh-number.wav ]; then
  15 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  16 +fi
  17 +
  18 +if [ ! -f ./itn_zh_number.fst ]; then
  19 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  20 +fi
  21 +
  22 +dart run \
  23 + ./bin/paraformer-itn.dart \
  24 + --model ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
  25 + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
  26 + --rule-fsts ./itn_zh_number.fst \
  27 + --input-wav ./itn-zh-number.wav
@@ -69,6 +69,10 @@ class OfflineDecodeFiles @@ -69,6 +69,10 @@ class OfflineDecodeFiles
69 HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] 69 HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")]
70 public string DecodingMethod { get; set; } 70 public string DecodingMethod { get; set; }
71 71
  72 + [Option("rule-fsts", Required = false, Default = "",
  73 + HelpText = "If not empty, path to rule fst for inverse text normalization")]
  74 + public string RuleFsts { get; set; }
  75 +
72 [Option("max-active-paths", Required = false, Default = 4, 76 [Option("max-active-paths", Required = false, Default = 4,
73 HelpText = @"Used only when --decoding--method is modified_beam_search. 77 HelpText = @"Used only when --decoding--method is modified_beam_search.
74 It specifies number of active paths to keep during the search")] 78 It specifies number of active paths to keep during the search")]
@@ -233,6 +237,7 @@ to download pre-trained Tdnn models. @@ -233,6 +237,7 @@ to download pre-trained Tdnn models.
233 config.MaxActivePaths = options.MaxActivePaths; 237 config.MaxActivePaths = options.MaxActivePaths;
234 config.HotwordsFile = options.HotwordsFile; 238 config.HotwordsFile = options.HotwordsFile;
235 config.HotwordsScore = options.HotwordsScore; 239 config.HotwordsScore = options.HotwordsScore;
  240 + config.RuleFsts = options.RuleFsts;
236 241
237 config.ModelConfig.Debug = 0; 242 config.ModelConfig.Debug = 0;
238 243
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  7 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  8 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  9 +fi
  10 +
  11 +if [ ! -f ./itn-zh-number.wav ]; then
  12 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  13 +fi
  14 +
  15 +if [ ! -f ./itn_zh_number.fst ]; then
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  17 +fi
  18 +
  19 +dotnet run \
  20 + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
  21 + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \
  22 + --rule-fsts=./itn_zh_number.fst \
  23 + --num-threads=2 \
  24 + --files ./itn-zh-number.wav
@@ -11,5 +11,5 @@ fi @@ -11,5 +11,5 @@ fi
11 dotnet run \ 11 dotnet run \
12 --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \ 12 --telespeech-ctc=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
13 --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \ 13 --tokens=./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
14 - --model-type=telespeech-ctc \ 14 + --model-type=telespeech_ctc \
15 --files ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav 15 --files ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav
@@ -48,6 +48,8 @@ func main() { @@ -48,6 +48,8 @@ func main() {
48 48
49 flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search") 49 flag.StringVar(&config.DecodingMethod, "decoding-method", "greedy_search", "Decoding method. Possible values: greedy_search, modified_beam_search")
50 flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") 50 flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
  51 + flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
  52 + flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
51 53
52 flag.Parse() 54 flag.Parse()
53 55
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d sherpa-onnx-paraformer-zh-2023-03-28 ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  7 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  8 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  9 +fi
  10 +
  11 +if [ ! -f ./itn-zh-number.wav ]; then
  12 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  13 +fi
  14 +
  15 +if [ ! -f ./itn_zh_number.fst ]; then
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  17 +fi
  18 +
  19 +go mod tidy
  20 +go build
  21 +
  22 +./non-streaming-decode-files \
  23 + --paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
  24 + --tokens ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \
  25 + --model-type paraformer \
  26 + --rule-fsts ./itn_zh_number.fst \
  27 + --debug 0 \
  28 + ./itn-zh-number.wav
@@ -14,6 +14,6 @@ go build @@ -14,6 +14,6 @@ go build
14 ./non-streaming-decode-files \ 14 ./non-streaming-decode-files \
15 --telespeech-ctc ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \ 15 --telespeech-ctc ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/model.int8.onnx \
16 --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \ 16 --tokens ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/tokens.txt \
17 - --model-type telespeech-ctc \ 17 + --model-type telespeech_ctc \
18 --debug 0 \ 18 --debug 0 \
19 ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav 19 ./sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04/test_wavs/3-sichuan.wav
  1 +// Copyright 2024 Xiaomi Corporation
  2 +
  3 +// This file shows how to use an offline paraformer, i.e., non-streaming paraformer,
  4 +// to decode files with inverse text normalization.
  5 +import com.k2fsa.sherpa.onnx.*;
  6 +
  7 +public class InverseTextNormalizationNonStreamingParaformer {
  8 + public static void main(String[] args) {
  9 + // please refer to
  10 + // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-paraformer-zh-2023-03-28-chinese-english
  11 + // to download model files
  12 + String model = "./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx";
  13 + String tokens = "./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt";
  14 +
  15 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  16 + String waveFilename = "./itn-zh-number.wav";
  17 +
  18 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  19 + String ruleFsts = "./itn_zh_number.fst";
  20 +
  21 + WaveReader reader = new WaveReader(waveFilename);
  22 +
  23 + OfflineParaformerModelConfig paraformer =
  24 + OfflineParaformerModelConfig.builder().setModel(model).build();
  25 +
  26 + OfflineModelConfig modelConfig =
  27 + OfflineModelConfig.builder()
  28 + .setParaformer(paraformer)
  29 + .setTokens(tokens)
  30 + .setNumThreads(1)
  31 + .setDebug(true)
  32 + .build();
  33 +
  34 + OfflineRecognizerConfig config =
  35 + OfflineRecognizerConfig.builder()
  36 + .setOfflineModelConfig(modelConfig)
  37 + .setDecodingMethod("greedy_search")
  38 + .setRuleFsts(ruleFsts)
  39 + .build();
  40 +
  41 + OfflineRecognizer recognizer = new OfflineRecognizer(config);
  42 + OfflineStream stream = recognizer.createStream();
  43 + stream.acceptWaveform(reader.getSamples(), reader.getSampleRate());
  44 +
  45 + recognizer.decode(stream);
  46 +
  47 + String text = recognizer.getResult(stream).getText();
  48 +
  49 + System.out.printf("filename:%s\nresult:%s\n", waveFilename, text);
  50 +
  51 + stream.release();
  52 + recognizer.release();
  53 + }
  54 +}
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then
  6 + mkdir -p ../build
  7 + pushd ../build
  8 + cmake \
  9 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  10 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  11 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  12 + -DBUILD_SHARED_LIBS=ON \
  13 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  14 + -DSHERPA_ONNX_ENABLE_JNI=ON \
  15 + ..
  16 +
  17 + make -j4
  18 + ls -lh lib
  19 + popd
  20 +fi
  21 +
  22 +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then
  23 + pushd ../sherpa-onnx/java-api
  24 + make
  25 + popd
  26 +fi
  27 +
  28 +if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
  29 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  30 +
  31 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  32 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  33 +fi
  34 +
  35 +if [ ! -f ./itn-zh-number.wav ]; then
  36 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  37 +fi
  38 +
  39 +if [ ! -f ./itn_zh_number.fst ]; then
  40 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  41 +fi
  42 +
  43 +java \
  44 + -Djava.library.path=$PWD/../build/lib \
  45 + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \
  46 + InverseTextNormalizationNonStreamingParaformer.java
@@ -203,6 +203,34 @@ function testOfflineAsr() { @@ -203,6 +203,34 @@ function testOfflineAsr() {
203 java -Djava.library.path=../build/lib -jar $out_filename 203 java -Djava.library.path=../build/lib -jar $out_filename
204 } 204 }
205 205
  206 +function testInverseTextNormalizationAsr() {
  207 + if [ ! -f ./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt ]; then
  208 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  209 + tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  210 + rm sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
  211 + fi
  212 +
  213 + if [ ! -f ./itn-zh-number.wav ]; then
  214 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  215 + fi
  216 +
  217 + if [ ! -f ./itn_zh_number.fst ]; then
  218 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  219 + fi
  220 +
  221 + out_filename=test_offline_asr.jar
  222 + kotlinc-jvm -include-runtime -d $out_filename \
  223 + test_itn_asr.kt \
  224 + FeatureConfig.kt \
  225 + OfflineRecognizer.kt \
  226 + OfflineStream.kt \
  227 + WaveReader.kt \
  228 + faked-asset-manager.kt
  229 +
  230 + ls -lh $out_filename
  231 + java -Djava.library.path=../build/lib -jar $out_filename
  232 +}
  233 +
206 function testPunctuation() { 234 function testPunctuation() {
207 if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then 235 if [ ! -f ./sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx ]; then
208 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 236 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2
@@ -229,3 +257,4 @@ testAudioTagging @@ -229,3 +257,4 @@ testAudioTagging
229 testSpokenLanguageIdentification 257 testSpokenLanguageIdentification
230 testOfflineAsr 258 testOfflineAsr
231 testPunctuation 259 testPunctuation
  260 +testInverseTextNormalizationAsr
  1 +package com.k2fsa.sherpa.onnx
  2 +
  3 +fun main() {
  4 + test()
  5 +}
  6 +
  7 +fun test() {
  8 + val recognizer = createOfflineRecognizer()
  9 + val waveFilename = "./itn-zh-number.wav";
  10 +
  11 + val objArray = WaveReader.readWaveFromFile(
  12 + filename = waveFilename,
  13 + )
  14 + val samples: FloatArray = objArray[0] as FloatArray
  15 + val sampleRate: Int = objArray[1] as Int
  16 +
  17 + val stream = recognizer.createStream()
  18 + stream.acceptWaveform(samples, sampleRate=sampleRate)
  19 + recognizer.decode(stream)
  20 +
  21 + val result = recognizer.getResult(stream)
  22 + println(result)
  23 +
  24 + stream.release()
  25 + recognizer.release()
  26 +}
  27 +
  28 +fun createOfflineRecognizer(): OfflineRecognizer {
  29 + val config = OfflineRecognizerConfig(
  30 + featConfig = getFeatureConfig(sampleRate = 16000, featureDim = 80),
  31 + modelConfig = getOfflineModelConfig(0)!!,
  32 + ruleFsts = "./itn_zh_number.fst",
  33 + )
  34 +
  35 + return OfflineRecognizer(config = config)
  36 +}
  37 +
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +const sherpa_onnx = require('sherpa-onnx-node');
  3 +
  4 +// Please download test files from
  5 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
  6 +const config = {
  7 + 'featConfig': {
  8 + 'sampleRate': 16000,
  9 + 'featureDim': 80,
  10 + },
  11 + 'modelConfig': {
  12 + 'paraformer': {
  13 + 'model': './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
  14 + },
  15 + 'tokens': './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
  16 + 'numThreads': 2,
  17 + 'provider': 'cpu',
  18 + 'debug': 1,
  19 + },
  20 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  21 + ruleFsts: './itn_zh_number.fst',
  22 +};
  23 +
  24 +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  25 +const waveFilename = './itn-zh-number.wav';
  26 +
  27 +const recognizer = new sherpa_onnx.OfflineRecognizer(config);
  28 +console.log('Started')
  29 +let start = Date.now();
  30 +const stream = recognizer.createStream();
  31 +const wave = sherpa_onnx.readWave(waveFilename);
  32 +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
  33 +
  34 +recognizer.decode(stream);
  35 +result = recognizer.getResult(stream)
  36 +let stop = Date.now();
  37 +console.log('Done')
  38 +
  39 +const elapsed_seconds = (stop - start) / 1000;
  40 +const duration = wave.samples.length / wave.sampleRate;
  41 +const real_time_factor = elapsed_seconds / duration;
  42 +console.log('Wave duration', duration.toFixed(3), 'secodns')
  43 +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
  44 +console.log(
  45 + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
  46 + real_time_factor.toFixed(3))
  47 +console.log(waveFilename)
  48 +console.log('result\n', result)
  1 +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +
  3 +const fs = require('fs');
  4 +const {Readable} = require('stream');
  5 +const wav = require('wav');
  6 +
  7 +const sherpa_onnx = require('sherpa-onnx');
  8 +
  9 +function createOfflineRecognizer() {
  10 + let featConfig = {
  11 + sampleRate: 16000,
  12 + featureDim: 80,
  13 + };
  14 +
  15 + let modelConfig = {
  16 + transducer: {
  17 + encoder: '',
  18 + decoder: '',
  19 + joiner: '',
  20 + },
  21 + paraformer: {
  22 + model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
  23 + },
  24 + nemoCtc: {
  25 + model: '',
  26 + },
  27 + whisper: {
  28 + encoder: '',
  29 + decoder: '',
  30 + language: '',
  31 + task: '',
  32 + tailPaddings: -1,
  33 + },
  34 + tdnn: {
  35 + model: '',
  36 + },
  37 + tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
  38 + numThreads: 1,
  39 + debug: 0,
  40 + provider: 'cpu',
  41 + modelType: 'paraformer',
  42 + };
  43 +
  44 + let lmConfig = {
  45 + model: '',
  46 + scale: 1.0,
  47 + };
  48 +
  49 + let config = {
  50 + featConfig: featConfig,
  51 + modelConfig: modelConfig,
  52 + lmConfig: lmConfig,
  53 + decodingMethod: 'greedy_search',
  54 + maxActivePaths: 4,
  55 + hotwordsFile: '',
  56 + hotwordsScore: 1.5,
  57 + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
  58 + ruleFsts: './itn_zh_number.fst',
  59 + };
  60 +
  61 + return sherpa_onnx.createOfflineRecognizer(config);
  62 +}
  63 +
  64 +
  65 +const recognizer = createOfflineRecognizer();
  66 +const stream = recognizer.createStream();
  67 +
  68 +// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn-zh-number.wav
  69 +const waveFilename = './itn-zh-number.wav';
  70 +
  71 +const reader = new wav.Reader();
  72 +const readable = new Readable().wrap(reader);
  73 +const buf = [];
  74 +
  75 +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
  76 + if (sampleRate != recognizer.config.featConfig.sampleRate) {
  77 + throw new Error(`Only support sampleRate ${
  78 + recognizer.config.featConfig.sampleRate}. Given ${sampleRate}`);
  79 + }
  80 +
  81 + if (audioFormat != 1) {
  82 + throw new Error(`Only support PCM format. Given ${audioFormat}`);
  83 + }
  84 +
  85 + if (channels != 1) {
  86 + throw new Error(`Only a single channel. Given ${channel}`);
  87 + }
  88 +
  89 + if (bitDepth != 16) {
  90 + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
  91 + }
  92 +});
  93 +
  94 +fs.createReadStream(waveFilename, {'highWaterMark': 4096})
  95 + .pipe(reader)
  96 + .on('finish', function(err) {
  97 + // tail padding
  98 + const floatSamples =
  99 + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
  100 +
  101 + buf.push(floatSamples);
  102 + const flattened =
  103 + Float32Array.from(buf.reduce((a, b) => [...a, ...b], []));
  104 +
  105 + stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
  106 + recognizer.decode(stream);
  107 + const text = recognizer.getResult(stream).text;
  108 + console.log(text);
  109 +
  110 + stream.free();
  111 + recognizer.free();
  112 + });
  113 +
  114 +readable.on('readable', function() {
  115 + let chunk;
  116 + while ((chunk = readable.read()) != null) {
  117 + const int16Samples = new Int16Array(
  118 + chunk.buffer, chunk.byteOffset,
  119 + chunk.length / Int16Array.BYTES_PER_ELEMENT);
  120 +
  121 + const floatSamples = new Float32Array(int16Samples.length);
  122 + for (let i = 0; i < floatSamples.length; i++) {
  123 + floatSamples[i] = int16Samples[i] / 32768.0;
  124 + }
  125 +
  126 + buf.push(floatSamples);
  127 + }
  128 +});
@@ -21,7 +21,8 @@ namespace SherpaOnnx @@ -21,7 +21,8 @@ namespace SherpaOnnx
21 MaxActivePaths = 4; 21 MaxActivePaths = 4;
22 HotwordsFile = ""; 22 HotwordsFile = "";
23 HotwordsScore = 1.5F; 23 HotwordsScore = 1.5F;
24 - 24 + RuleFsts = "";
  25 + RuleFars = "";
25 } 26 }
26 public FeatureConfig FeatConfig; 27 public FeatureConfig FeatConfig;
27 public OfflineModelConfig ModelConfig; 28 public OfflineModelConfig ModelConfig;
@@ -36,5 +37,11 @@ namespace SherpaOnnx @@ -36,5 +37,11 @@ namespace SherpaOnnx
36 public string HotwordsFile; 37 public string HotwordsFile;
37 38
38 public float HotwordsScore; 39 public float HotwordsScore;
  40 +
  41 + [MarshalAs(UnmanagedType.LPStr)]
  42 + public string RuleFsts;
  43 +
  44 + [MarshalAs(UnmanagedType.LPStr)]
  45 + public string RuleFars;
39 } 46 }
40 } 47 }
  1 +../../../../go-api-examples/non-streaming-decode-files/run-paraformer-itn.sh
@@ -397,6 +397,10 @@ type OfflineRecognizerConfig struct { @@ -397,6 +397,10 @@ type OfflineRecognizerConfig struct {
397 397
398 // Used only when DecodingMethod is modified_beam_search. 398 // Used only when DecodingMethod is modified_beam_search.
399 MaxActivePaths int 399 MaxActivePaths int
  400 + HotwordsFile string
  401 + HotwordsScore float32
  402 + RuleFsts string
  403 + RuleFars string
400 } 404 }
401 405
402 // It wraps a pointer from C 406 // It wraps a pointer from C
@@ -491,6 +495,17 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer { @@ -491,6 +495,17 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
491 495
492 c.max_active_paths = C.int(config.MaxActivePaths) 496 c.max_active_paths = C.int(config.MaxActivePaths)
493 497
  498 + c.hotwords_file = C.CString(config.HotwordsFile)
  499 + defer C.free(unsafe.Pointer(c.hotwords_file))
  500 +
  501 + c.hotwords_score = C.float(config.HotwordsScore)
  502 +
  503 + c.rule_fsts = C.CString(config.RuleFsts)
  504 + defer C.free(unsafe.Pointer(c.rule_fsts))
  505 +
  506 + c.rule_fars = C.CString(config.RuleFars)
  507 + defer C.free(unsafe.Pointer(c.rule_fars))
  508 +
494 recognizer := &OfflineRecognizer{} 509 recognizer := &OfflineRecognizer{}
495 recognizer.impl = C.CreateOfflineRecognizer(&c) 510 recognizer.impl = C.CreateOfflineRecognizer(&c)
496 511
@@ -15,8 +15,8 @@ cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON .. @@ -15,8 +15,8 @@ cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
15 make -j install 15 make -j install
16 export PKG_CONFIG_PATH=$PWD/install:$PKG_CONFIG_PATH 16 export PKG_CONFIG_PATH=$PWD/install:$PKG_CONFIG_PATH
17 cd ../scripts/node-addon-api/ 17 cd ../scripts/node-addon-api/
18 -  
19 -./node_modules/.bin/node-gyp build --verbose 18 +npm i
  19 +./node_modules/.bin/cmake-js compile --log-level verbose
20 20
21 # see test/test_asr_streaming_transducer.js 21 # see test/test_asr_streaming_transducer.js
22 # for usages 22 # for usages
@@ -180,6 +180,8 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -180,6 +180,8 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
180 SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths); 180 SHERPA_ONNX_ASSIGN_ATTR_INT32(max_active_paths, maxActivePaths);
181 SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile); 181 SHERPA_ONNX_ASSIGN_ATTR_STR(hotwords_file, hotwordsFile);
182 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore); 182 SHERPA_ONNX_ASSIGN_ATTR_FLOAT(hotwords_score, hotwordsScore);
  183 + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
  184 + SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
183 185
184 SherpaOnnxOfflineRecognizer *recognizer = CreateOfflineRecognizer(&c); 186 SherpaOnnxOfflineRecognizer *recognizer = CreateOfflineRecognizer(&c);
185 187
@@ -259,6 +261,14 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) { @@ -259,6 +261,14 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
259 delete[] c.hotwords_file; 261 delete[] c.hotwords_file;
260 } 262 }
261 263
  264 + if (c.rule_fsts) {
  265 + delete[] c.rule_fsts;
  266 + }
  267 +
  268 + if (c.rule_fars) {
  269 + delete[] c.rule_fars;
  270 + }
  271 +
262 if (!recognizer) { 272 if (!recognizer) {
263 Napi::TypeError::New(env, "Please check your config!") 273 Napi::TypeError::New(env, "Please check your config!")
264 .ThrowAsJavaScriptException(); 274 .ThrowAsJavaScriptException();
@@ -44,7 +44,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( @@ -44,7 +44,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
44 44
45 c.vits = GetOfflineTtsVitsModelConfig(o); 45 c.vits = GetOfflineTtsVitsModelConfig(o);
46 46
47 - SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, num_threads); 47 + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
48 48
49 if (o.Has("debug") && 49 if (o.Has("debug") &&
50 (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { 50 (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
@@ -388,6 +388,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( @@ -388,6 +388,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
388 recognizer_config.hotwords_score = 388 recognizer_config.hotwords_score =
389 SHERPA_ONNX_OR(config->hotwords_score, 1.5); 389 SHERPA_ONNX_OR(config->hotwords_score, 1.5);
390 390
  391 + recognizer_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
  392 + recognizer_config.rule_fars = SHERPA_ONNX_OR(config->rule_fars, "");
  393 +
391 if (config->model_config.debug) { 394 if (config->model_config.debug) {
392 SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); 395 SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
393 } 396 }
@@ -411,6 +411,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { @@ -411,6 +411,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig {
411 411
412 /// Bonus score for each token in hotwords. 412 /// Bonus score for each token in hotwords.
413 float hotwords_score; 413 float hotwords_score;
  414 + const char *rule_fsts;
  415 + const char *rule_fars;
414 } SherpaOnnxOfflineRecognizerConfig; 416 } SherpaOnnxOfflineRecognizerConfig;
415 417
416 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer 418 SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer
@@ -137,11 +137,13 @@ class OfflineRecognizerConfig { @@ -137,11 +137,13 @@ class OfflineRecognizerConfig {
137 this.maxActivePaths = 4, 137 this.maxActivePaths = 4,
138 this.hotwordsFile = '', 138 this.hotwordsFile = '',
139 this.hotwordsScore = 1.5, 139 this.hotwordsScore = 1.5,
  140 + this.ruleFsts = '',
  141 + this.ruleFars = '',
140 }); 142 });
141 143
142 @override 144 @override
143 String toString() { 145 String toString() {
144 - return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore)'; 146 + return 'OfflineRecognizerConfig(feat: $feat, model: $model, lm: $lm, decodingMethod: $decodingMethod, maxActivePaths: $maxActivePaths, hotwordsFile: $hotwordsFile, hotwordsScore: $hotwordsScore, ruleFsts: $ruleFsts, ruleFars: $ruleFars)';
145 } 147 }
146 148
147 final FeatureConfig feat; 149 final FeatureConfig feat;
@@ -154,6 +156,9 @@ class OfflineRecognizerConfig { @@ -154,6 +156,9 @@ class OfflineRecognizerConfig {
154 final String hotwordsFile; 156 final String hotwordsFile;
155 157
156 final double hotwordsScore; 158 final double hotwordsScore;
  159 +
  160 + final String ruleFsts;
  161 + final String ruleFars;
157 } 162 }
158 163
159 class OfflineRecognizerResult { 164 class OfflineRecognizerResult {
@@ -232,8 +237,13 @@ class OfflineRecognizer { @@ -232,8 +237,13 @@ class OfflineRecognizer {
232 c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8(); 237 c.ref.hotwordsFile = config.hotwordsFile.toNativeUtf8();
233 c.ref.hotwordsScore = config.hotwordsScore; 238 c.ref.hotwordsScore = config.hotwordsScore;
234 239
  240 + c.ref.ruleFsts = config.ruleFsts.toNativeUtf8();
  241 + c.ref.ruleFars = config.ruleFars.toNativeUtf8();
  242 +
235 final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr; 243 final ptr = SherpaOnnxBindings.createOfflineRecognizer?.call(c) ?? nullptr;
236 244
  245 + calloc.free(c.ref.ruleFars);
  246 + calloc.free(c.ref.ruleFsts);
237 calloc.free(c.ref.hotwordsFile); 247 calloc.free(c.ref.hotwordsFile);
238 calloc.free(c.ref.decodingMethod); 248 calloc.free(c.ref.decodingMethod);
239 calloc.free(c.ref.lm.model); 249 calloc.free(c.ref.lm.model);
@@ -130,6 +130,9 @@ final class SherpaOnnxOfflineRecognizerConfig extends Struct { @@ -130,6 +130,9 @@ final class SherpaOnnxOfflineRecognizerConfig extends Struct {
130 130
131 @Float() 131 @Float()
132 external double hotwordsScore; 132 external double hotwordsScore;
  133 +
  134 + external Pointer<Utf8> ruleFsts;
  135 + external Pointer<Utf8> ruleFars;
133 } 136 }
134 137
135 final class SherpaOnnxOnlineTransducerModelConfig extends Struct { 138 final class SherpaOnnxOnlineTransducerModelConfig extends Struct {
@@ -9,6 +9,8 @@ public class OfflineRecognizerConfig { @@ -9,6 +9,8 @@ public class OfflineRecognizerConfig {
9 private final int maxActivePaths; 9 private final int maxActivePaths;
10 private final String hotwordsFile; 10 private final String hotwordsFile;
11 private final float hotwordsScore; 11 private final float hotwordsScore;
  12 + private final String ruleFsts;
  13 + private final String ruleFars;
12 14
13 private OfflineRecognizerConfig(Builder builder) { 15 private OfflineRecognizerConfig(Builder builder) {
14 this.featConfig = builder.featConfig; 16 this.featConfig = builder.featConfig;
@@ -17,6 +19,8 @@ public class OfflineRecognizerConfig { @@ -17,6 +19,8 @@ public class OfflineRecognizerConfig {
17 this.maxActivePaths = builder.maxActivePaths; 19 this.maxActivePaths = builder.maxActivePaths;
18 this.hotwordsFile = builder.hotwordsFile; 20 this.hotwordsFile = builder.hotwordsFile;
19 this.hotwordsScore = builder.hotwordsScore; 21 this.hotwordsScore = builder.hotwordsScore;
  22 + this.ruleFsts = builder.ruleFsts;
  23 + this.ruleFars = builder.ruleFars;
20 } 24 }
21 25
22 public static Builder builder() { 26 public static Builder builder() {
@@ -34,6 +38,8 @@ public class OfflineRecognizerConfig { @@ -34,6 +38,8 @@ public class OfflineRecognizerConfig {
34 private int maxActivePaths = 4; 38 private int maxActivePaths = 4;
35 private String hotwordsFile = ""; 39 private String hotwordsFile = "";
36 private float hotwordsScore = 1.5f; 40 private float hotwordsScore = 1.5f;
  41 + private String ruleFsts = "";
  42 + private String ruleFars = "";
37 43
38 public OfflineRecognizerConfig build() { 44 public OfflineRecognizerConfig build() {
39 return new OfflineRecognizerConfig(this); 45 return new OfflineRecognizerConfig(this);
@@ -68,5 +74,15 @@ public class OfflineRecognizerConfig { @@ -68,5 +74,15 @@ public class OfflineRecognizerConfig {
68 this.hotwordsScore = hotwordsScore; 74 this.hotwordsScore = hotwordsScore;
69 return this; 75 return this;
70 } 76 }
  77 +
  78 + public Builder setRuleFsts(String ruleFsts) {
  79 + this.ruleFsts = ruleFsts;
  80 + return this;
  81 + }
  82 +
  83 + public Builder setRuleFars(String ruleFars) {
  84 + this.ruleFars = ruleFars;
  85 + return this;
  86 + }
71 } 87 }
72 } 88 }
@@ -34,6 +34,18 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) { @@ -34,6 +34,18 @@ static OfflineRecognizerConfig GetOfflineConfig(JNIEnv *env, jobject config) {
34 fid = env->GetFieldID(cls, "hotwordsScore", "F"); 34 fid = env->GetFieldID(cls, "hotwordsScore", "F");
35 ans.hotwords_score = env->GetFloatField(config, fid); 35 ans.hotwords_score = env->GetFloatField(config, fid);
36 36
  37 + fid = env->GetFieldID(cls, "ruleFsts", "Ljava/lang/String;");
  38 + s = (jstring)env->GetObjectField(config, fid);
  39 + p = env->GetStringUTFChars(s, nullptr);
  40 + ans.rule_fsts = p;
  41 + env->ReleaseStringUTFChars(s, p);
  42 +
  43 + fid = env->GetFieldID(cls, "ruleFars", "Ljava/lang/String;");
  44 + s = (jstring)env->GetObjectField(config, fid);
  45 + p = env->GetStringUTFChars(s, nullptr);
  46 + ans.rule_fars = p;
  47 + env->ReleaseStringUTFChars(s, p);
  48 +
37 //---------- feat config ---------- 49 //---------- feat config ----------
38 fid = env->GetFieldID(cls, "featConfig", 50 fid = env->GetFieldID(cls, "featConfig",
39 "Lcom/k2fsa/sherpa/onnx/FeatureConfig;"); 51 "Lcom/k2fsa/sherpa/onnx/FeatureConfig;");
@@ -53,6 +53,8 @@ data class OfflineRecognizerConfig( @@ -53,6 +53,8 @@ data class OfflineRecognizerConfig(
53 var maxActivePaths: Int = 4, 53 var maxActivePaths: Int = 4,
54 var hotwordsFile: String = "", 54 var hotwordsFile: String = "",
55 var hotwordsScore: Float = 1.5f, 55 var hotwordsScore: Float = 1.5f,
  56 + var ruleFsts: String = "",
  57 + var ruleFars: String = "",
56 ) 58 )
57 59
58 class OfflineRecognizer( 60 class OfflineRecognizer(
@@ -387,7 +387,9 @@ func sherpaOnnxOfflineRecognizerConfig( @@ -387,7 +387,9 @@ func sherpaOnnxOfflineRecognizerConfig(
387 decodingMethod: String = "greedy_search", 387 decodingMethod: String = "greedy_search",
388 maxActivePaths: Int = 4, 388 maxActivePaths: Int = 4,
389 hotwordsFile: String = "", 389 hotwordsFile: String = "",
390 - hotwordsScore: Float = 1.5 390 + hotwordsScore: Float = 1.5,
  391 + ruleFsts: String = "",
  392 + ruleFars: String = ""
391 ) -> SherpaOnnxOfflineRecognizerConfig { 393 ) -> SherpaOnnxOfflineRecognizerConfig {
392 return SherpaOnnxOfflineRecognizerConfig( 394 return SherpaOnnxOfflineRecognizerConfig(
393 feat_config: featConfig, 395 feat_config: featConfig,
@@ -396,7 +398,9 @@ func sherpaOnnxOfflineRecognizerConfig( @@ -396,7 +398,9 @@ func sherpaOnnxOfflineRecognizerConfig(
396 decoding_method: toCPointer(decodingMethod), 398 decoding_method: toCPointer(decodingMethod),
397 max_active_paths: Int32(maxActivePaths), 399 max_active_paths: Int32(maxActivePaths),
398 hotwords_file: toCPointer(hotwordsFile), 400 hotwords_file: toCPointer(hotwordsFile),
399 - hotwords_score: hotwordsScore 401 + hotwords_score: hotwordsScore,
  402 + rule_fsts: toCPointer(ruleFsts),
  403 + rule_fars: toCPointer(ruleFars)
400 ) 404 )
401 } 405 }
402 406
@@ -628,7 +628,7 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -628,7 +628,7 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
628 const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module); 628 const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
629 const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module); 629 const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
630 630
631 - const len = feat.len + model.len + lm.len + 4 * 4; 631 + const len = feat.len + model.len + lm.len + 6 * 4;
632 const ptr = Module._malloc(len); 632 const ptr = Module._malloc(len);
633 633
634 let offset = 0; 634 let offset = 0;
@@ -643,7 +643,10 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -643,7 +643,10 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
643 643
644 const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1; 644 const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
645 const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1; 645 const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
646 - const bufferLen = decodingMethodLen + hotwordsFileLen; 646 + const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
  647 + const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
  648 + const bufferLen =
  649 + decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen;
647 const buffer = Module._malloc(bufferLen); 650 const buffer = Module._malloc(bufferLen);
648 651
649 offset = 0; 652 offset = 0;
@@ -651,6 +654,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -651,6 +654,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
651 offset += decodingMethodLen; 654 offset += decodingMethodLen;
652 655
653 Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen); 656 Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
  657 + offset += hotwordsFileLen;
  658 +
  659 + Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
  660 + offset += ruleFstsLen;
  661 +
  662 + Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen);
  663 + offset += ruleFarsLen;
654 664
655 offset = feat.len + model.len + lm.len; 665 offset = feat.len + model.len + lm.len;
656 666
@@ -666,6 +676,15 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) { @@ -666,6 +676,15 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
666 Module.setValue(ptr + offset, config.hotwordsScore, 'float'); 676 Module.setValue(ptr + offset, config.hotwordsScore, 'float');
667 offset += 4; 677 offset += 4;
668 678
  679 + Module.setValue(
  680 + ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
  681 + offset += 4;
  682 +
  683 + Module.setValue(
  684 + ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen,
  685 + 'i8*');
  686 + offset += 4;
  687 +
669 return { 688 return {
670 buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm 689 buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
671 } 690 }
@@ -29,7 +29,7 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); @@ -29,7 +29,7 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
29 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) == 29 static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
30 sizeof(SherpaOnnxFeatureConfig) + 30 sizeof(SherpaOnnxFeatureConfig) +
31 sizeof(SherpaOnnxOfflineLMConfig) + 31 sizeof(SherpaOnnxOfflineLMConfig) +
32 - sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4, 32 + sizeof(SherpaOnnxOfflineModelConfig) + 6 * 4,
33 ""); 33 "");
34 34
35 void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) { 35 void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
@@ -103,6 +103,8 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) { @@ -103,6 +103,8 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
103 fprintf(stdout, "max active paths: %d\n", config->max_active_paths); 103 fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
104 fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); 104 fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
105 fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); 105 fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
  106 + fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
  107 + fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
106 } 108 }
107 109
108 void CopyHeap(const char *src, int32_t num_bytes, char *dst) { 110 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {