Fangjun Kuang
Committed by GitHub

Add Go API for homophone replacer (#2168)

... ... @@ -142,32 +142,31 @@ jobs:
name: ${{ matrix.os }}-libs
path: to-upload/
- name: Test speech enhancement (GTCRN)
- name: Test streaming decoding files
shell: bash
run: |
cd scripts/go/_internal/speech-enhancement-gtcrn/
./run.sh
cd scripts/go/_internal/streaming-decode-files
ls -lh
- name: Test audio tagging
shell: bash
run: |
cd scripts/go/_internal/audio-tagging/
./run.sh
go mod tidy
cat go.mod
go build
ls -lh
- name: Test Keyword spotting
shell: bash
run: |
cd scripts/go/_internal/keyword-spotting-from-file/
echo "Test zipformer2 CTC"
./run-zipformer2-ctc-with-hr.sh
./run-zipformer2-ctc.sh
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
./run.sh
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
ls -lh
./run-transducer-itn.sh
rm -rf sherpa-onnx-streaming-*
echo "Test paraformer"
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
- name: Test non-streaming decoding files
shell: bash
... ... @@ -179,6 +178,11 @@ jobs:
go build
ls -lh
echo "Test SenseVoice ctc"
./run-sense-voice-small-with-hr.sh
./run-sense-voice-small.sh
rm -rf sherpa-onnx-sense-*
echo "Test Dolphin CTC"
./run-dolphin-ctc-base.sh
rm -rf sherpa-onnx-dolphin-*
... ... @@ -191,10 +195,6 @@ jobs:
./run-moonshine.sh
rm -rf sherpa-onnx-*
echo "Test SenseVoice ctc"
./run-sense-voice-small.sh
rm -rf sherpa-onnx-sense-*
echo "Test telespeech ctc"
./run-telespeech-ctc.sh
rm -rf sherpa-onnx-telespeech-ctc-*
... ... @@ -224,6 +224,33 @@ jobs:
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
- name: Test speech enhancement (GTCRN)
shell: bash
run: |
cd scripts/go/_internal/speech-enhancement-gtcrn/
./run.sh
ls -lh
- name: Test audio tagging
shell: bash
run: |
cd scripts/go/_internal/audio-tagging/
./run.sh
ls -lh
- name: Test Keyword spotting
shell: bash
run: |
cd scripts/go/_internal/keyword-spotting-from-file/
./run.sh
ls -lh
- name: Test adding punctuation
shell: bash
run: |
... ... @@ -301,28 +328,3 @@ jobs:
with:
name: tts-waves-${{ matrix.os }}
path: tts-waves
- name: Test streaming decoding files
shell: bash
run: |
cd scripts/go/_internal/streaming-decode-files
ls -lh
go mod tidy
cat go.mod
go build
ls -lh
echo "Test zipformer2 CTC"
./run-zipformer2-ctc.sh
rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
echo "Test transducer"
./run-transducer.sh
rm -rf sherpa-onnx-streaming-zipformer-en-2023-06-26
./run-transducer-itn.sh
rm -rf sherpa-onnx-streaming-*
echo "Test paraformer"
./run-paraformer.sh
rm -rf sherpa-onnx-streaming-paraformer-bilingual-zh-en
... ...
... ... @@ -66,6 +66,10 @@ func main() {
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
flag.Parse()
if len(flag.Args()) != 1 {
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2
fi
if [ ! -d dict ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi
go mod tidy
go build
./non-streaming-decode-files \
--sense-voice-model ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx \
--tokens ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt \
--debug 1 \
--hr-dict-dir ./dict \
--hr-lexicon ./lexicon.txt \
--hr-rule-fsts ./replace.fst \
./test-hr.wav
... ...
... ... @@ -32,6 +32,9 @@ func main() {
flag.IntVar(&config.MaxActivePaths, "max-active-paths", 4, "Used only when --decoding-method is modified_beam_search")
flag.StringVar(&config.RuleFsts, "rule-fsts", "", "If not empty, path to rule fst for inverse text normalization")
flag.StringVar(&config.RuleFars, "rule-fars", "", "If not empty, path to rule fst archives for inverse text normalization")
flag.StringVar(&config.Hr.DictDir, "hr-dict-dir", "", "If not empty, path to the jieba dict dir for homonphone replacer")
flag.StringVar(&config.Hr.Lexicon, "hr-lexicon", "", "If not empty, path to the lexicon.txt for homonphone replacer")
flag.StringVar(&config.Hr.RuleFsts, "hr-rule-fsts", "", "If not empty, path to the replace.fst for homonphone replacer")
flag.Parse()
... ...
#!/usr/bin/env bash
set -ex
if [ ! -d sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
fi
if [ ! -d dict ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2
tar xf dict.tar.bz2
rm dict.tar.bz2
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt
fi
go mod tidy
go build
./streaming-decode-files \
--zipformer2-ctc ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
--tokens ./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt \
--hr-dict-dir ./dict \
--hr-lexicon ./lexicon.txt \
--hr-rule-fsts ./replace.fst \
./test-hr.wav
... ...
../../../../go-api-examples/non-streaming-decode-files/run-sense-voice-small-with-hr.sh
\ No newline at end of file
... ...
../../../../go-api-examples/streaming-decode-files/run-zipformer2-ctc-with-hr.sh
\ No newline at end of file
... ...
... ... @@ -108,6 +108,12 @@ type OnlineCtcFstDecoderConfig struct {
MaxActive int
}
type HomophoneReplacerConfig struct {
DictDir string
Lexicon string
RuleFsts string
}
// Configuration for the online/streaming recognizer.
type OnlineRecognizerConfig struct {
FeatConfig FeatureConfig
... ... @@ -137,6 +143,7 @@ type OnlineRecognizerConfig struct {
RuleFars string
HotwordsBuf string
HotwordsBufSize int
Hr HomophoneReplacerConfig
}
// It contains the recognition result for a online stream.
... ... @@ -239,6 +246,15 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
c.hr.dict_dir = C.CString(config.Hr.DictDir)
defer C.free(unsafe.Pointer(c.hr.dict_dir))
c.hr.lexicon = C.CString(config.Hr.Lexicon)
defer C.free(unsafe.Pointer(c.hr.lexicon))
c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
defer C.free(unsafe.Pointer(c.hr.rule_fsts))
impl := C.SherpaOnnxCreateOnlineRecognizer(&c)
if impl == nil {
return nil
... ... @@ -462,6 +478,7 @@ type OfflineRecognizerConfig struct {
BlankPenalty float32
RuleFsts string
RuleFars string
Hr HomophoneReplacerConfig
}
// It wraps a pointer from C
... ... @@ -549,6 +566,10 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
c.rule_fsts = C.CString(config.RuleFsts)
c.rule_fars = C.CString(config.RuleFars)
c.hr.dict_dir = C.CString(config.Hr.DictDir)
c.hr.lexicon = C.CString(config.Hr.Lexicon)
c.hr.rule_fsts = C.CString(config.Hr.RuleFsts)
return &c
}
func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig) {
... ... @@ -676,10 +697,26 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
C.free(unsafe.Pointer(c.rule_fsts))
c.rule_fsts = nil
}
if c.rule_fars != nil {
C.free(unsafe.Pointer(c.rule_fars))
c.rule_fars = nil
}
if c.hr.dict_dir != nil {
C.free(unsafe.Pointer(c.hr.dict_dir))
c.hr.dict_dir = nil
}
if c.hr.lexicon != nil {
C.free(unsafe.Pointer(c.hr.lexicon))
c.hr.lexicon = nil
}
if c.hr.rule_fsts != nil {
C.free(unsafe.Pointer(c.hr.rule_fsts))
c.hr.rule_fsts = nil
}
}
// Frees the internal pointer of the recognition to avoid memory leak.
... ...