Fangjun Kuang
Committed by GitHub

Add Go API for homophone replacer (#2168)

@@ -142,32 +142,31 @@ jobs: @@ -142,32 +142,31 @@ jobs:
142 name: ${{ matrix.os }}-libs 142 name: ${{ matrix.os }}-libs
143 path: to-upload/ 143 path: to-upload/
144 144
145 - - name: Test speech enhancement (GTCRN) 145 + - name: Test streaming decoding files
146 shell: bash 146 shell: bash
147 run: | 147 run: |
148 - cd scripts/go/_internal/speech-enhancement-gtcrn/  
149 -  
150 - ./run.sh  
151 - 148 + cd scripts/go/_internal/streaming-decode-files
152 ls -lh 149 ls -lh
153 -  
154 - - name: Test audio tagging  
155 - shell: bash  
156 - run: |  
157 - cd scripts/go/_internal/audio-tagging/  
158 -  
159 - ./run.sh  
160 - 150 + go mod tidy
  151 + cat go.mod
  152 + go build
161 ls -lh 153 ls -lh
162 154
163 - - name: Test Keyword spotting  
164 - shell: bash  
165 - run: |  
166 - cd scripts/go/_internal/keyword-spotting-from-file/ 155 + echo "Test zipformer2 CTC"
  156 + ./run-zipformer2-ctc-with-hr.sh
  157 + ./run-zipformer2-ctc.sh
  158 + rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
167 159
168 - ./run.sh 160 + echo "Test transducer"
  161 + ./run-transducer.sh
  162 + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
169 163
170 - ls -lh 164 + ./run-transducer-itn.sh
  165 + rm -rf sherpa-onnx-streaming-*
  166 +
  167 + echo "Test paraformer"
  168 + ./run-paraformer.sh
  169 + rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
171 170
172 - name: Test non-streaming decoding files 171 - name: Test non-streaming decoding files
173 shell: bash 172 shell: bash
@@ -179,6 +178,11 @@ jobs: @@ -179,6 +178,11 @@ jobs:
179 go build 178 go build
180 ls -lh 179 ls -lh
181 180
  181 + echo "Test SenseVoice ctc"
  182 + ./run-sense-voice-small-with-hr.sh
  183 + ./run-sense-voice-small.sh
  184 + rm -rf sherpa-onnx-sense-*
  185 +
182 echo "Test Dolphin CTC" 186 echo "Test Dolphin CTC"
183 ./run-dolphin-ctc-base.sh 187 ./run-dolphin-ctc-base.sh
184 rm -rf sherpa-onnx-dolphin-* 188 rm -rf sherpa-onnx-dolphin-*
@@ -191,10 +195,6 @@ jobs: @@ -191,10 +195,6 @@ jobs:
191 ./run-moonshine.sh 195 ./run-moonshine.sh
192 rm -rf sherpa-onnx-* 196 rm -rf sherpa-onnx-*
193 197
194 - echo "Test SenseVoice ctc"  
195 - ./run-sense-voice-small.sh  
196 - rm -rf sherpa-onnx-sense-*  
197 -  
198 echo "Test telespeech ctc" 198 echo "Test telespeech ctc"
199 ./run-telespeech-ctc.sh 199 ./run-telespeech-ctc.sh
200 rm -rf sherpa-onnx-telespeech-ctc-* 200 rm -rf sherpa-onnx-telespeech-ctc-*
@@ -224,6 +224,33 @@ jobs: @@ -224,6 +224,33 @@ jobs:
224 ./run-tdnn-yesno.sh 224 ./run-tdnn-yesno.sh
225 rm -rf sherpa-onnx-tdnn-yesno 225 rm -rf sherpa-onnx-tdnn-yesno
226 226
  227 + - name: Test speech enhancement (GTCRN)
  228 + shell: bash
  229 + run: |
  230 + cd scripts/go/_internal/speech-enhancement-gtcrn/
  231 +
  232 + ./run.sh
  233 +
  234 + ls -lh
  235 +
  236 + - name: Test audio tagging
  237 + shell: bash
  238 + run: |
  239 + cd scripts/go/_internal/audio-tagging/
  240 +
  241 + ./run.sh
  242 +
  243 + ls -lh
  244 +
  245 + - name: Test Keyword spotting
  246 + shell: bash
  247 + run: |
  248 + cd scripts/go/_internal/keyword-spotting-from-file/
  249 +
  250 + ./run.sh
  251 +
  252 + ls -lh
  253 +
227 - name: Test adding punctuation 254 - name: Test adding punctuation
228 shell: bash 255 shell: bash
229 run: | 256 run: |
@@ -301,28 +328,3 @@ jobs: @@ -301,28 +328,3 @@ jobs:
301 with: 328 with:
302 name: tts-waves-${{ matrix.os }} 329 name: tts-waves-${{ matrix.os }}
303 path: tts-waves 330 path: tts-waves
304 -  
305 - - name: Test streaming decoding files  
306 - shell: bash  
307 - run: |  
308 - cd scripts/go/_internal/streaming-decode-files  
309 - ls -lh  
310 - go mod tidy  
311 - cat go.mod  
312 - go build  
313 - ls -lh  
314 -  
315 - echo "Test zipformer2 CTC"  
316 - ./run-zipformer2-ctc.sh  
317 - rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13  
318 -  
319 - echo "Test transducer"  
320 - ./run-transducer.sh  
321 - rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26  
322 -  
323 - ./run-transducer-itn.sh  
324 - rm -rf sherpa-onnx-streaming-*  
325 -  
326 - echo "Test paraformer"  
327 - ./run-paraformer.sh  
328 - rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en  
@@ -66,6 +66,10 @@ func main() { @@ -66,6 +66,10 @@ func main() {
66 flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") 66 flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
67 flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") 67 flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
68 68
  69 + flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
  70 + flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
  71 + flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
  72 +
69 flag.Parse() 73 flag.Parse()
70 74
71 if len(flag.Args()) != 1 { 75 if len(flag.Args()) != 1 {
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  7 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  8 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
  9 +fi
  10 +
  11 +if [ ! -d dict ]; then
  12 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  13 + tar xf dict.tar.bz2
  14 + rm dict.tar.bz2
  15 +
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  17 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  19 +fi
  20 +
  21 +go mod tidy
  22 +go build
  23 +
  24 +./non-streaming-decode-files \
  25 + --sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
  26 + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
  27 + --debug 1 \
  28 + --hr-dict-dir ./dict \
  29 + --hr-lexicon ./lexicon.txt \
  30 + --hr-rule-fsts ./replace.fst \
  31 + ./test-hr.wav
@@ -32,6 +32,9 @@ func main() { @@ -32,6 +32,9 @@ func main() {
32 flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") 32 flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
33 flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") 33 flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
34 flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") 34 flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
  35 + flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
  36 + flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
  37 + flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
35 38
36 flag.Parse() 39 flag.Parse()
37 40
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  7 + tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  8 + rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
  9 +fi
  10 +
  11 +if [ ! -d dict ]; then
  12 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
  13 + tar xf dict.tar.bz2
  14 + rm dict.tar.bz2
  15 +
  16 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
  17 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
  18 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
  19 +fi
  20 +
  21 +go mod tidy
  22 +go build
  23 +
  24 +./streaming-decode-files \
  25 + --zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
  26 + --tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
  27 + --hr-dict-dir ./dict \
  28 + --hr-lexicon ./lexicon.txt \
  29 + --hr-rule-fsts ./replace.fst \
  30 + ./test-hr.wav
  1 +../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
  1 +../../../../go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
@@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct { @@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct {
108 MaxActive int 108 MaxActive int
109 } 109 }
110 110
  111 +type HomophoneReplacerConfig struct {
  112 + DictDir string
  113 + Lexicon string
  114 + RuleFsts string
  115 +}
  116 +
111 // Configuration for the online/streaming recognizer. 117 // Configuration for the online/streaming recognizer.
112 type OnlineRecognizerConfig struct { 118 type OnlineRecognizerConfig struct {
113 FeatConfig FeatureConfig 119 FeatConfig FeatureConfig
@@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct { @@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct {
137 RuleFars string 143 RuleFars string
138 HotwordsBuf string 144 HotwordsBuf string
139 HotwordsBufSize int 145 HotwordsBufSize int
  146 + Hr HomophoneReplacerConfig
140 } 147 }
141 148
142 // It contains the recognition result for a online stream. 149 // It contains the recognition result for a online stream.
@@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { @@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
239 defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph)) 246 defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
240 c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive) 247 c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
241 248
  249 + c.hr.dict_dir = C.CString(config.Hr.DictDir)
  250 + defer C.free(unsafe.Pointer(c.hr.dict_dir))
  251 +
  252 + c.hr.lexicon = C.CString(config.Hr.Lexicon)
  253 + defer C.free(unsafe.Pointer(c.hr.lexicon))
  254 +
  255 + c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
  256 + defer C.free(unsafe.Pointer(c.hr.rule_fsts))
  257 +
242 impl := C.SherpaOnnxCreateOnlineRecognizer(&c) 258 impl := C.SherpaOnnxCreateOnlineRecognizer(&c)
243 if impl == nil { 259 if impl == nil {
244 return nil 260 return nil
@@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct { @@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct {
462 BlankPenalty float32 478 BlankPenalty float32
463 RuleFsts string 479 RuleFsts string
464 RuleFars string 480 RuleFars string
  481 + Hr HomophoneReplacerConfig
465 } 482 }
466 483
467 // It wraps a pointer from C 484 // It wraps a pointer from C
@@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher @@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
549 566
550 c.rule_fsts = C.CString(config.RuleFsts) 567 c.rule_fsts = C.CString(config.RuleFsts)
551 c.rule_fars = C.CString(config.RuleFars) 568 c.rule_fars = C.CString(config.RuleFars)
  569 +
  570 + c.hr.dict_dir = C.CString(config.Hr.DictDir)
  571 + c.hr.lexicon = C.CString(config.Hr.Lexicon)
  572 + c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
552 return &c 573 return &c
553 } 574 }
554 func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) { 575 func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) {
@@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) @@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
676 C.free(unsafe.Pointer(c.rule_fsts)) 697 C.free(unsafe.Pointer(c.rule_fsts))
677 c.rule_fsts = nil 698 c.rule_fsts = nil
678 } 699 }
  700 +
679 if c.rule_fars != nil { 701 if c.rule_fars != nil {
680 C.free(unsafe.Pointer(c.rule_fars)) 702 C.free(unsafe.Pointer(c.rule_fars))
681 c.rule_fars = nil 703 c.rule_fars = nil
682 } 704 }
  705 +
  706 + if c.hr.dict_dir != nil {
  707 + C.free(unsafe.Pointer(c.hr.dict_dir))
  708 + c.hr.dict_dir = nil
  709 + }
  710 +
  711 + if c.hr.lexicon != nil {
  712 + C.free(unsafe.Pointer(c.hr.lexicon))
  713 + c.hr.lexicon = nil
  714 + }
  715 +
  716 + if c.hr.rule_fsts != nil {
  717 + C.free(unsafe.Pointer(c.hr.rule_fsts))
  718 + c.hr.rule_fsts = nil
  719 + }
683 } 720 }
684 721
685 // Frees the internal pointer of the recognition to avoid memory leak. 722 // Frees the internal pointer of the recognition to avoid memory leak.