Committed by
GitHub
Add Go API for homophone replacer (#2168)
正在显示
8 个修改的文件
包含
157 行增加
和
48 行删除
| @@ -142,32 +142,31 @@ jobs: | @@ -142,32 +142,31 @@ jobs: | ||
| 142 | name: ${{ matrix.os }}-libs | 142 | name: ${{ matrix.os }}-libs |
| 143 | path: to-upload/ | 143 | path: to-upload/ |
| 144 | 144 | ||
| 145 | - - name: Test speech enhancement (GTCRN) | 145 | + - name: Test streaming decoding files |
| 146 | shell: bash | 146 | shell: bash |
| 147 | run: | | 147 | run: | |
| 148 | - cd scripts/go/_internal/speech-enhancement-gtcrn/ | ||
| 149 | - | ||
| 150 | - ./run.sh | ||
| 151 | - | 148 | + cd scripts/go/_internal/streaming-decode-files |
| 152 | ls -lh | 149 | ls -lh |
| 153 | - | ||
| 154 | - - name: Test audio tagging | ||
| 155 | - shell: bash | ||
| 156 | - run: | | ||
| 157 | - cd scripts/go/_internal/audio-tagging/ | ||
| 158 | - | ||
| 159 | - ./run.sh | ||
| 160 | - | 150 | + go mod tidy |
| 151 | + cat go.mod | ||
| 152 | + go build | ||
| 161 | ls -lh | 153 | ls -lh |
| 162 | 154 | ||
| 163 | - - name: Test Keyword spotting | ||
| 164 | - shell: bash | ||
| 165 | - run: | | ||
| 166 | - cd scripts/go/_internal/keyword-spotting-from-file/ | 155 | + echo "Test zipformer2 CTC" |
| 156 | + ./run-zipformer2-ctc-with-hr.sh | ||
| 157 | + ./run-zipformer2-ctc.sh | ||
| 158 | + rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 | ||
| 167 | 159 | ||
| 168 | - ./run.sh | 160 | + echo "Test transducer" |
| 161 | + ./run-transducer.sh | ||
| 162 | + rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 169 | 163 | ||
| 170 | - ls -lh | 164 | + ./run-transducer-itn.sh |
| 165 | + rm -rf sherpa-onnx-streaming-* | ||
| 166 | + | ||
| 167 | + echo "Test paraformer" | ||
| 168 | + ./run-paraformer.sh | ||
| 169 | + rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en | ||
| 171 | 170 | ||
| 172 | - name: Test non-streaming decoding files | 171 | - name: Test non-streaming decoding files |
| 173 | shell: bash | 172 | shell: bash |
| @@ -179,6 +178,11 @@ jobs: | @@ -179,6 +178,11 @@ jobs: | ||
| 179 | go build | 178 | go build |
| 180 | ls -lh | 179 | ls -lh |
| 181 | 180 | ||
| 181 | + echo "Test SenseVoice ctc" | ||
| 182 | + ./run-sense-voice-small-with-hr.sh | ||
| 183 | + ./run-sense-voice-small.sh | ||
| 184 | + rm -rf sherpa-onnx-sense-* | ||
| 185 | + | ||
| 182 | echo "Test Dolphin CTC" | 186 | echo "Test Dolphin CTC" |
| 183 | ./run-dolphin-ctc-base.sh | 187 | ./run-dolphin-ctc-base.sh |
| 184 | rm -rf sherpa-onnx-dolphin-* | 188 | rm -rf sherpa-onnx-dolphin-* |
| @@ -191,10 +195,6 @@ jobs: | @@ -191,10 +195,6 @@ jobs: | ||
| 191 | ./run-moonshine.sh | 195 | ./run-moonshine.sh |
| 192 | rm -rf sherpa-onnx-* | 196 | rm -rf sherpa-onnx-* |
| 193 | 197 | ||
| 194 | - echo "Test SenseVoice ctc" | ||
| 195 | - ./run-sense-voice-small.sh | ||
| 196 | - rm -rf sherpa-onnx-sense-* | ||
| 197 | - | ||
| 198 | echo "Test telespeech ctc" | 198 | echo "Test telespeech ctc" |
| 199 | ./run-telespeech-ctc.sh | 199 | ./run-telespeech-ctc.sh |
| 200 | rm -rf sherpa-onnx-telespeech-ctc-* | 200 | rm -rf sherpa-onnx-telespeech-ctc-* |
| @@ -224,6 +224,33 @@ jobs: | @@ -224,6 +224,33 @@ jobs: | ||
| 224 | ./run-tdnn-yesno.sh | 224 | ./run-tdnn-yesno.sh |
| 225 | rm -rf sherpa-onnx-tdnn-yesno | 225 | rm -rf sherpa-onnx-tdnn-yesno |
| 226 | 226 | ||
| 227 | + - name: Test speech enhancement (GTCRN) | ||
| 228 | + shell: bash | ||
| 229 | + run: | | ||
| 230 | + cd scripts/go/_internal/speech-enhancement-gtcrn/ | ||
| 231 | + | ||
| 232 | + ./run.sh | ||
| 233 | + | ||
| 234 | + ls -lh | ||
| 235 | + | ||
| 236 | + - name: Test audio tagging | ||
| 237 | + shell: bash | ||
| 238 | + run: | | ||
| 239 | + cd scripts/go/_internal/audio-tagging/ | ||
| 240 | + | ||
| 241 | + ./run.sh | ||
| 242 | + | ||
| 243 | + ls -lh | ||
| 244 | + | ||
| 245 | + - name: Test Keyword spotting | ||
| 246 | + shell: bash | ||
| 247 | + run: | | ||
| 248 | + cd scripts/go/_internal/keyword-spotting-from-file/ | ||
| 249 | + | ||
| 250 | + ./run.sh | ||
| 251 | + | ||
| 252 | + ls -lh | ||
| 253 | + | ||
| 227 | - name: Test adding punctuation | 254 | - name: Test adding punctuation |
| 228 | shell: bash | 255 | shell: bash |
| 229 | run: | | 256 | run: | |
| @@ -301,28 +328,3 @@ jobs: | @@ -301,28 +328,3 @@ jobs: | ||
| 301 | with: | 328 | with: |
| 302 | name: tts-waves-${{ matrix.os }} | 329 | name: tts-waves-${{ matrix.os }} |
| 303 | path: tts-waves | 330 | path: tts-waves |
| 304 | - | ||
| 305 | - - name: Test streaming decoding files | ||
| 306 | - shell: bash | ||
| 307 | - run: | | ||
| 308 | - cd scripts/go/_internal/streaming-decode-files | ||
| 309 | - ls -lh | ||
| 310 | - go mod tidy | ||
| 311 | - cat go.mod | ||
| 312 | - go build | ||
| 313 | - ls -lh | ||
| 314 | - | ||
| 315 | - echo "Test zipformer2 CTC" | ||
| 316 | - ./run-zipformer2-ctc.sh | ||
| 317 | - rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 | ||
| 318 | - | ||
| 319 | - echo "Test transducer" | ||
| 320 | - ./run-transducer.sh | ||
| 321 | - rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26 | ||
| 322 | - | ||
| 323 | - ./run-transducer-itn.sh | ||
| 324 | - rm -rf sherpa-onnx-streaming-* | ||
| 325 | - | ||
| 326 | - echo "Test paraformer" | ||
| 327 | - ./run-paraformer.sh | ||
| 328 | - rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en |
| @@ -66,6 +66,10 @@ func main() { | @@ -66,6 +66,10 @@ func main() { | ||
| 66 | flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") | 66 | flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") |
| 67 | flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") | 67 | flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") |
| 68 | 68 | ||
| 69 | + flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer") | ||
| 70 | + flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer") | ||
| 71 | + flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer") | ||
| 72 | + | ||
| 69 | flag.Parse() | 73 | flag.Parse() |
| 70 | 74 | ||
| 71 | if len(flag.Args()) != 1 { | 75 | if len(flag.Args()) != 1 { |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 8 | + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +if [ ! -d dict ]; then | ||
| 12 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 13 | + tar xf dict.tar.bz2 | ||
| 14 | + rm dict.tar.bz2 | ||
| 15 | + | ||
| 16 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 17 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 18 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +go mod tidy | ||
| 22 | +go build | ||
| 23 | + | ||
| 24 | +./non-streaming-decode-files \ | ||
| 25 | + --sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \ | ||
| 26 | + --tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \ | ||
| 27 | + --debug 1 \ | ||
| 28 | + --hr-dict-dir ./dict \ | ||
| 29 | + --hr-lexicon ./lexicon.txt \ | ||
| 30 | + --hr-rule-fsts ./replace.fst \ | ||
| 31 | + ./test-hr.wav |
| @@ -32,6 +32,9 @@ func main() { | @@ -32,6 +32,9 @@ func main() { | ||
| 32 | flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") | 32 | flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search") |
| 33 | flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") | 33 | flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization") |
| 34 | flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") | 34 | flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization") |
| 35 | + flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer") | ||
| 36 | + flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer") | ||
| 37 | + flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer") | ||
| 35 | 38 | ||
| 36 | flag.Parse() | 39 | flag.Parse() |
| 37 | 40 |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 8 | + rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +if [ ! -d dict ]; then | ||
| 12 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 | ||
| 13 | + tar xf dict.tar.bz2 | ||
| 14 | + rm dict.tar.bz2 | ||
| 15 | + | ||
| 16 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst | ||
| 17 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav | ||
| 18 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt | ||
| 19 | +fi | ||
| 20 | + | ||
| 21 | +go mod tidy | ||
| 22 | +go build | ||
| 23 | + | ||
| 24 | +./streaming-decode-files \ | ||
| 25 | + --zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \ | ||
| 26 | + --tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \ | ||
| 27 | + --hr-dict-dir ./dict \ | ||
| 28 | + --hr-lexicon ./lexicon.txt \ | ||
| 29 | + --hr-rule-fsts ./replace.fst \ | ||
| 30 | + ./test-hr.wav |
| 1 | +../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh |
| 1 | +../../../../go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh |
| @@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct { | @@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct { | ||
| 108 | MaxActive int | 108 | MaxActive int |
| 109 | } | 109 | } |
| 110 | 110 | ||
| 111 | +type HomophoneReplacerConfig struct { | ||
| 112 | + DictDir string | ||
| 113 | + Lexicon string | ||
| 114 | + RuleFsts string | ||
| 115 | +} | ||
| 116 | + | ||
| 111 | // Configuration for the online/streaming recognizer. | 117 | // Configuration for the online/streaming recognizer. |
| 112 | type OnlineRecognizerConfig struct { | 118 | type OnlineRecognizerConfig struct { |
| 113 | FeatConfig FeatureConfig | 119 | FeatConfig FeatureConfig |
| @@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct { | @@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct { | ||
| 137 | RuleFars string | 143 | RuleFars string |
| 138 | HotwordsBuf string | 144 | HotwordsBuf string |
| 139 | HotwordsBufSize int | 145 | HotwordsBufSize int |
| 146 | + Hr HomophoneReplacerConfig | ||
| 140 | } | 147 | } |
| 141 | 148 | ||
| 142 | // It contains the recognition result for a online stream. | 149 | // It contains the recognition result for a online stream. |
| @@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | @@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | ||
| 239 | defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph)) | 246 | defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph)) |
| 240 | c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive) | 247 | c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive) |
| 241 | 248 | ||
| 249 | + c.hr.dict_dir = C.CString(config.Hr.DictDir) | ||
| 250 | + defer C.free(unsafe.Pointer(c.hr.dict_dir)) | ||
| 251 | + | ||
| 252 | + c.hr.lexicon = C.CString(config.Hr.Lexicon) | ||
| 253 | + defer C.free(unsafe.Pointer(c.hr.lexicon)) | ||
| 254 | + | ||
| 255 | + c.hr.rule_fsts = C.CString(config.Hr.RuleFsts) | ||
| 256 | + defer C.free(unsafe.Pointer(c.hr.rule_fsts)) | ||
| 257 | + | ||
| 242 | impl := C.SherpaOnnxCreateOnlineRecognizer(&c) | 258 | impl := C.SherpaOnnxCreateOnlineRecognizer(&c) |
| 243 | if impl == nil { | 259 | if impl == nil { |
| 244 | return nil | 260 | return nil |
| @@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct { | @@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct { | ||
| 462 | BlankPenalty float32 | 478 | BlankPenalty float32 |
| 463 | RuleFsts string | 479 | RuleFsts string |
| 464 | RuleFars string | 480 | RuleFars string |
| 481 | + Hr HomophoneReplacerConfig | ||
| 465 | } | 482 | } |
| 466 | 483 | ||
| 467 | // It wraps a pointer from C | 484 | // It wraps a pointer from C |
| @@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher | @@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher | ||
| 549 | 566 | ||
| 550 | c.rule_fsts = C.CString(config.RuleFsts) | 567 | c.rule_fsts = C.CString(config.RuleFsts) |
| 551 | c.rule_fars = C.CString(config.RuleFars) | 568 | c.rule_fars = C.CString(config.RuleFars) |
| 569 | + | ||
| 570 | + c.hr.dict_dir = C.CString(config.Hr.DictDir) | ||
| 571 | + c.hr.lexicon = C.CString(config.Hr.Lexicon) | ||
| 572 | + c.hr.rule_fsts = C.CString(config.Hr.RuleFsts) | ||
| 552 | return &c | 573 | return &c |
| 553 | } | 574 | } |
| 554 | func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) { | 575 | func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) { |
| @@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) | @@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) | ||
| 676 | C.free(unsafe.Pointer(c.rule_fsts)) | 697 | C.free(unsafe.Pointer(c.rule_fsts)) |
| 677 | c.rule_fsts = nil | 698 | c.rule_fsts = nil |
| 678 | } | 699 | } |
| 700 | + | ||
| 679 | if c.rule_fars != nil { | 701 | if c.rule_fars != nil { |
| 680 | C.free(unsafe.Pointer(c.rule_fars)) | 702 | C.free(unsafe.Pointer(c.rule_fars)) |
| 681 | c.rule_fars = nil | 703 | c.rule_fars = nil |
| 682 | } | 704 | } |
| 705 | + | ||
| 706 | + if c.hr.dict_dir != nil { | ||
| 707 | + C.free(unsafe.Pointer(c.hr.dict_dir)) | ||
| 708 | + c.hr.dict_dir = nil | ||
| 709 | + } | ||
| 710 | + | ||
| 711 | + if c.hr.lexicon != nil { | ||
| 712 | + C.free(unsafe.Pointer(c.hr.lexicon)) | ||
| 713 | + c.hr.lexicon = nil | ||
| 714 | + } | ||
| 715 | + | ||
| 716 | + if c.hr.rule_fsts != nil { | ||
| 717 | + C.free(unsafe.Pointer(c.hr.rule_fsts)) | ||
| 718 | + c.hr.rule_fsts = nil | ||
| 719 | + } | ||
| 683 | } | 720 | } |
| 684 | 721 | ||
| 685 | // Frees the internal pointer of the recognition to avoid memory leak. | 722 | // Frees the internal pointer of the recognition to avoid memory leak. |
-
请 注册 或 登录 后发表评论