继续操作前请注册或者登录。
Fangjun Kuang
Committed by GitHub

Support Portuguese and German ASR models from NeMo (#2394)

@@ -39,6 +39,7 @@ jobs: @@ -39,6 +39,7 @@ jobs:
39 shell: bash 39 shell: bash
40 run: | 40 run: |
41 cd scripts/nemo/fast-conformer-hybrid-transducer-ctc 41 cd scripts/nemo/fast-conformer-hybrid-transducer-ctc
  42 + ./run-ctc-non-streaming-2.sh
42 ./run-ctc-non-streaming.sh 43 ./run-ctc-non-streaming.sh
43 44
44 mv -v sherpa-onnx-nemo* ../../.. 45 mv -v sherpa-onnx-nemo* ../../..
@@ -66,6 +67,10 @@ jobs: @@ -66,6 +67,10 @@ jobs:
66 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8 67 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
67 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 68 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
68 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8 69 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
  70 + sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc
  71 + sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8
  72 + sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc
  73 + sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8
69 ) 74 )
70 75
71 for m in ${models[@]}; do 76 for m in ${models[@]}; do
@@ -75,7 +80,7 @@ jobs: @@ -75,7 +80,7 @@ jobs:
75 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface 80 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
76 cp -av $m/* huggingface 81 cp -av $m/* huggingface
77 cd huggingface 82 cd huggingface
78 - git lfs track "*.onnx" 83 + git lfs track "*.onnx" "*.wav"
79 git status 84 git status
80 git add . 85 git add .
81 git status 86 git status
@@ -99,6 +104,10 @@ jobs: @@ -99,6 +104,10 @@ jobs:
99 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8 104 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
100 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 105 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
101 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8 106 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
  107 + sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc
  108 + sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8
  109 + sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc
  110 + sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8
102 ) 111 )
103 for d in ${dirs[@]}; do 112 for d in ${dirs[@]}; do
104 tar cjvf ${d}.tar.bz2 ./$d 113 tar cjvf ${d}.tar.bz2 ./$d
@@ -39,6 +39,7 @@ jobs: @@ -39,6 +39,7 @@ jobs:
39 shell: bash 39 shell: bash
40 run: | 40 run: |
41 cd scripts/nemo/fast-conformer-hybrid-transducer-ctc 41 cd scripts/nemo/fast-conformer-hybrid-transducer-ctc
  42 + ./run-transducer-non-streaming-2.sh
42 ./run-transducer-non-streaming.sh 43 ./run-transducer-non-streaming.sh
43 44
44 mv -v sherpa-onnx-nemo* ../../.. 45 mv -v sherpa-onnx-nemo* ../../..
@@ -66,6 +67,10 @@ jobs: @@ -66,6 +67,10 @@ jobs:
66 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8 67 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
67 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 68 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
68 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8 69 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
  70 + sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc
  71 + sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8
  72 + sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc
  73 + sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8
69 ) 74 )
70 75
71 for m in ${models[@]}; do 76 for m in ${models[@]}; do
@@ -75,7 +80,7 @@ jobs: @@ -75,7 +80,7 @@ jobs:
75 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface 80 git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
76 cp -av $m/* huggingface 81 cp -av $m/* huggingface
77 cd huggingface 82 cd huggingface
78 - git lfs track "*.onnx" 83 + git lfs track "*.onnx" "*.wav"
79 git status 84 git status
80 git add . 85 git add .
81 git status 86 git status
@@ -98,6 +103,10 @@ jobs: @@ -98,6 +103,10 @@ jobs:
98 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8 103 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
99 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8 104 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
100 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8 105 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
  106 + sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc
  107 + sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8
  108 + sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc
  109 + sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8
101 ) 110 )
102 for d in ${dirs[@]}; do 111 for d in ${dirs[@]}; do
103 tar cjvf ${d}.tar.bz2 ./$d 112 tar cjvf ${d}.tar.bz2 ./$d
@@ -600,6 +600,70 @@ def get_models(): @@ -600,6 +600,70 @@ def get_models():
600 popd 600 popd
601 """, 601 """,
602 ), 602 ),
  603 + Model(
  604 + model_name="sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8",
  605 + idx=35,
  606 + lang="pt",
  607 + lang2="Portuguese",
  608 + short_name="stt_pt_fastconformer_hybrid_large_pc_transducer_int8",
  609 + cmd="""
  610 + pushd $model_name
  611 +
  612 + rm -rfv test_wavs
  613 +
  614 + ls -lh
  615 +
  616 + popd
  617 + """,
  618 + ),
  619 + Model(
  620 + model_name="sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8",
  621 + idx=36,
  622 + lang="pt",
  623 + lang2="Portuguese",
  624 + short_name="stt_pt_fastconformer_hybrid_large_pc_ctc-int8",
  625 + cmd="""
  626 + pushd $model_name
  627 +
  628 + rm -rfv test_wavs
  629 +
  630 + ls -lh
  631 +
  632 + popd
  633 + """,
  634 + ),
  635 + Model(
  636 + model_name="sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8",
  637 + idx=37,
  638 + lang="de",
  639 + lang2="German",
  640 + short_name="stt_de_fastconformer_hybrid_large_pc_transducer_int8",
  641 + cmd="""
  642 + pushd $model_name
  643 +
  644 + rm -rfv test_wavs
  645 +
  646 + ls -lh
  647 +
  648 + popd
  649 + """,
  650 + ),
  651 + Model(
  652 + model_name="sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8",
  653 + idx=38,
  654 + lang="de",
  655 + lang2="German",
  656 + short_name="stt_de_fastconformer_hybrid_large_pc_ctc-int8",
  657 + cmd="""
  658 + pushd $model_name
  659 +
  660 + rm -rfv test_wavs
  661 +
  662 + ls -lh
  663 +
  664 + popd
  665 + """,
  666 + ),
603 ] 667 ]
604 return models 668 return models
605 669
@@ -24,5 +24,7 @@ This folder contains scripts for exporting models from @@ -24,5 +24,7 @@ This folder contains scripts for exporting models from
24 24
25 - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m 25 - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m
26 - https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja 26 - https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja
  27 + - https://huggingface.co/nvidia/stt_pt_fastconformer_hybrid_large_pc
  28 + - https://huggingface.co/nvidia/stt_de_fastconformer_hybrid_large_pc
27 29
28 to `sherpa-onnx`. 30 to `sherpa-onnx`.
@@ -81,7 +81,9 @@ def main(): @@ -81,7 +81,9 @@ def main():
81 "model_type": "EncDecHybridRNNTCTCBPEModel", 81 "model_type": "EncDecHybridRNNTCTCBPEModel",
82 "version": "1", 82 "version": "1",
83 "model_author": "NeMo", 83 "model_author": "NeMo",
84 - "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}", 84 + "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}"
  85 + if "/" in model_name
  86 + else f"https://huggingface.co/{model_name}",
85 "comment": "Only the CTC branch is exported", 87 "comment": "Only the CTC branch is exported",
86 "doc": args.doc, 88 "doc": args.doc,
87 } 89 }
@@ -85,7 +85,9 @@ def main(): @@ -85,7 +85,9 @@ def main():
85 "model_type": "EncDecHybridRNNTCTCBPEModel", 85 "model_type": "EncDecHybridRNNTCTCBPEModel",
86 "version": "1", 86 "version": "1",
87 "model_author": "NeMo", 87 "model_author": "NeMo",
88 - "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}", 88 + "url": f"https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/{model_name}"
  89 + if "/" in model_name
  90 + else f"https://huggingface.co/{model_name}",
89 "comment": "Only the transducer branch is exported", 91 "comment": "Only the transducer branch is exported",
90 "doc": args.doc, 92 "doc": args.doc,
91 } 93 }
  1 +#!/usr/bin/env bash
  2 +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +
  4 +set -ex
  5 +
  6 +log() {
  7 + # This function is from espnet
  8 + local fname=${BASH_SOURCE[1]##*/}
  9 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  10 +}
  11 +
  12 +# 2200 hours of Portuguese speech
  13 +url=https://huggingface.co/nvidia/stt_pt_fastconformer_hybrid_large_pc
  14 +name=$(basename $url)
  15 +name="nvidia/$name"
  16 +doc="STT PT FastConformer Hybrid Transducer-CTC Large transcribes text in upper and lower case Portuguese alphabet along with spaces, period, comma, question mark. This collection contains the Brazilian Portuguese FastConformer Hybrid (Transducer and CTC) Large model (around 115M parameters) with punctuation and capitalization trained on around 2200h hours of Portuguese speech. "
  17 +
  18 +log "Process $name at $url"
  19 +./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"
  20 +d=sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc
  21 +mkdir -p $d
  22 +mv -v model.onnx $d/
  23 +cp -v tokens.txt $d/
  24 +ls -lh $d
  25 +
  26 +mkdir test_wavs
  27 +pushd test_wavs
  28 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/pt_br.wav
  29 +popd
  30 +cp -a test_wavs $d
  31 +
  32 +d=sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8
  33 +mkdir -p $d
  34 +mv -v model.int8.onnx $d/
  35 +mv -v tokens.txt $d/
  36 +ls -lh $d
  37 +mv test_wavs $d
  38 +
  39 +python3 ./test-onnx-ctc-non-streaming.py \
  40 + --model $d/model.int8.onnx \
  41 + --tokens $d/tokens.txt \
  42 + --wav $d/test_wavs/pt_br.wav
  43 +
  44 +
  45 +# 2500 hours of German speech
  46 +url=https://huggingface.co/nvidia/stt_de_fastconformer_hybrid_large_pc
  47 +name=$(basename $url)
  48 +name="nvidia/$name"
  49 +doc="This model transcribes speech in upper and lower case German alphabet along with spaces, periods, commas, and question marks. It is a 'large' version of FastConformer Transducer-CTC (around 115M parameters) model. This is a hybrid model trained on two losses: Transducer (default) and CTC."
  50 +
  51 +log "Process $name at $url"
  52 +./export-onnx-ctc-non-streaming.py --model $name --doc "$doc"
  53 +d=sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc
  54 +mkdir -p $d
  55 +mv -v model.onnx $d/
  56 +cp -v tokens.txt $d/
  57 +ls -lh $d
  58 +
  59 +mkdir test_wavs
  60 +pushd test_wavs
  61 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav
  62 +popd
  63 +cp -a test_wavs $d
  64 +
  65 +d=sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8
  66 +mkdir -p $d
  67 +mv -v model.int8.onnx $d/
  68 +mv -v tokens.txt $d/
  69 +ls -lh $d
  70 +mv test_wavs $d
  71 +
  72 +python3 ./test-onnx-ctc-non-streaming.py \
  73 + --model $d/model.int8.onnx \
  74 + --tokens $d/tokens.txt \
  75 + --wav $d/test_wavs/de.wav
  1 +#!/usr/bin/env bash
  2 +# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +
  4 +set -ex
  5 +
  6 +log() {
  7 + # This function is from espnet
  8 + local fname=${BASH_SOURCE[1]##*/}
  9 + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
  10 +}
  11 +
  12 +# 2200 hours of Portuguese speech
  13 +url=https://huggingface.co/nvidia/stt_pt_fastconformer_hybrid_large_pc
  14 +name=$(basename $url)
  15 +name="nvidia/$name"
  16 +doc="STT PT FastConformer Hybrid Transducer-CTC Large transcribes text in upper and lower case Portuguese alphabet along with spaces, period, comma, question mark. This collection contains the Brazilian Portuguese FastConformer Hybrid (Transducer and CTC) Large model (around 115M parameters) with punctuation and capitalization trained on around 2200h hours of Portuguese speech. "
  17 +
  18 +log "Process $name at $url"
  19 +./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"
  20 +d=sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc
  21 +mkdir -p $d
  22 +mv -v encoder.onnx $d/
  23 +mv -v decoder.onnx $d/
  24 +mv -v joiner.onnx $d/
  25 +cp -v tokens.txt $d/
  26 +ls -lh $d
  27 +
  28 +mkdir test_wavs
  29 +pushd test_wavs
  30 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/pt_br.wav
  31 +popd
  32 +cp -a test_wavs $d
  33 +
  34 +d=sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8
  35 +mkdir -p $d
  36 +mv -v encoder.int8.onnx $d/
  37 +mv -v decoder.int8.onnx $d/
  38 +mv -v joiner.int8.onnx $d/
  39 +mv -v tokens.txt $d/
  40 +ls -lh $d
  41 +mv test_wavs $d
  42 +
  43 +python3 ./test-onnx-transducer-non-streaming.py \
  44 + --encoder $d/encoder.int8.onnx \
  45 + --decoder $d/decoder.int8.onnx \
  46 + --joiner $d/joiner.int8.onnx \
  47 + --tokens $d/tokens.txt \
  48 + --wav $d/test_wavs/pt_br.wav
  49 +
  50 +# 2500 hours of German speech
  51 +url=https://huggingface.co/nvidia/stt_de_fastconformer_hybrid_large_pc
  52 +name=$(basename $url)
  53 +name="nvidia/$name"
  54 +doc="This model transcribes speech in upper and lower case German alphabet along with spaces, periods, commas, and question marks. It is a 'large' version of FastConformer Transducer-CTC (around 115M parameters) model. This is a hybrid model trained on two losses: Transducer (default) and CTC."
  55 +
  56 +log "Process $name at $url"
  57 +./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"
  58 +d=sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc
  59 +mkdir -p $d
  60 +mv -v encoder.onnx $d/
  61 +mv -v decoder.onnx $d/
  62 +mv -v joiner.onnx $d/
  63 +cp -v tokens.txt $d/
  64 +ls -lh $d
  65 +
  66 +mkdir test_wavs
  67 +pushd test_wavs
  68 +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav
  69 +popd
  70 +cp -a test_wavs $d
  71 +
  72 +d=sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8
  73 +mkdir -p $d
  74 +mv -v encoder.int8.onnx $d/
  75 +mv -v decoder.int8.onnx $d/
  76 +mv -v joiner.int8.onnx $d/
  77 +mv -v tokens.txt $d/
  78 +ls -lh $d
  79 +mv test_wavs $d
  80 +
  81 +python3 ./test-onnx-transducer-non-streaming.py \
  82 + --encoder $d/encoder.int8.onnx \
  83 + --decoder $d/decoder.int8.onnx \
  84 + --joiner $d/joiner.int8.onnx \
  85 + --tokens $d/tokens.txt \
  86 + --wav $d/test_wavs/de.wav
@@ -621,6 +621,52 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { @@ -621,6 +621,52 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
621 tokens = "$modelDir/tokens.txt", 621 tokens = "$modelDir/tokens.txt",
622 ) 622 )
623 } 623 }
  624 +
  625 + 35 -> {
  626 + val modelDir = "sherpa-onnx-nemo-transducer-stt_pt_fastconformer_hybrid_large_pc-int8"
  627 + return OfflineModelConfig(
  628 + transducer = OfflineTransducerModelConfig(
  629 + encoder = "$modelDir/encoder.int8.onnx",
  630 + decoder = "$modelDir/decoder.int8.onnx",
  631 + joiner = "$modelDir/joiner.int8.onnx",
  632 + ),
  633 + tokens = "$modelDir/tokens.txt",
  634 + modelType = "nemo_transducer",
  635 + )
  636 + }
  637 +
  638 + 36 -> {
  639 + val modelDir = "sherpa-onnx-nemo-stt_pt_fastconformer_hybrid_large_pc-int8"
  640 + return OfflineModelConfig(
  641 + nemo = OfflineNemoEncDecCtcModelConfig(
  642 + model = "$modelDir/model.int8.onnx",
  643 + ),
  644 + tokens = "$modelDir/tokens.txt",
  645 + )
  646 + }
  647 +
  648 + 37 -> {
  649 + val modelDir = "sherpa-onnx-nemo-transducer-stt_de_fastconformer_hybrid_large_pc-int8"
  650 + return OfflineModelConfig(
  651 + transducer = OfflineTransducerModelConfig(
  652 + encoder = "$modelDir/encoder.int8.onnx",
  653 + decoder = "$modelDir/decoder.int8.onnx",
  654 + joiner = "$modelDir/joiner.int8.onnx",
  655 + ),
  656 + tokens = "$modelDir/tokens.txt",
  657 + modelType = "nemo_transducer",
  658 + )
  659 + }
  660 +
  661 + 38 -> {
  662 + val modelDir = "sherpa-onnx-nemo-stt_de_fastconformer_hybrid_large_pc-int8"
  663 + return OfflineModelConfig(
  664 + nemo = OfflineNemoEncDecCtcModelConfig(
  665 + model = "$modelDir/model.int8.onnx",
  666 + ),
  667 + tokens = "$modelDir/tokens.txt",
  668 + )
  669 + }
624 } 670 }
625 return null 671 return null
626 } 672 }