Fangjun Kuang
Committed by GitHub

Refactor exporting NeMo models (#2362)

Refactors and extends model export support to include new NeMo Parakeet TDT int8 variants for English and Japanese, updating the Kotlin API, export scripts, test runners, and CI workflows.

- Added support for two new int8 model types in OfflineRecognizer.kt.
- Enhanced Python export scripts to perform dynamic quantization and metadata injection.
- Updated shell scripts and GitHub workflows to package, test, and publish int8 model artifacts.
@@ -61,6 +61,11 @@ jobs: @@ -61,6 +61,11 @@ jobs:
61 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 61 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
62 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k 62 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
63 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000 63 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
  64 + sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
  65 + sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
  66 + sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
  67 + sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  68 + sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
64 ) 69 )
65 70
66 for m in ${models[@]}; do 71 for m in ${models[@]}; do
@@ -89,6 +94,11 @@ jobs: @@ -89,6 +94,11 @@ jobs:
89 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 94 sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
90 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k 95 sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
91 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000 96 sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
  97 + sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
  98 + sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
  99 + sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
  100 + sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  101 + sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
92 ) 102 )
93 for d in ${dirs[@]}; do 103 for d in ${dirs[@]}; do
94 tar cjvf ${d}.tar.bz2 ./$d 104 tar cjvf ${d}.tar.bz2 ./$d
@@ -54,13 +54,18 @@ jobs: @@ -54,13 +54,18 @@ jobs:
54 curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt 54 curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
55 popd 55 popd
56 56
57 - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms  
58 - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms  
59 - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms  
60 -  
61 - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms  
62 - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms  
63 - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms 57 + names=(
  58 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
  59 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
  60 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
  61 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
  62 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
  63 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
  64 + )
  65 + for d in ${names[@]}; do
  66 + cp -av test_wavs $d/
  67 + tar cjvf $d.tar.bz2 $d
  68 + done
64 69
65 - name: Release 70 - name: Release
66 uses: svenstaro/upload-release-action@v2 71 uses: svenstaro/upload-release-action@v2
@@ -71,3 +76,41 @@ jobs: @@ -71,3 +76,41 @@ jobs:
71 repo_name: k2-fsa/sherpa-onnx 76 repo_name: k2-fsa/sherpa-onnx
72 repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} 77 repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
73 tag: asr-models 78 tag: asr-models
  79 +
  80 + - name: Publish to huggingface
  81 + env:
  82 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  83 + uses: nick-fields/retry@v3
  84 + with:
  85 + max_attempts: 20
  86 + timeout_seconds: 200
  87 + shell: bash
  88 + command: |
  89 + git config --global user.email "csukuangfj@gmail.com"
  90 + git config --global user.name "Fangjun Kuang"
  91 +
  92 + models=(
  93 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms
  94 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms
  95 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms
  96 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-80ms-int8
  97 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-480ms-int8
  98 + sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-1040ms-int8
  99 + )
  100 +
  101 + for m in ${models[@]}; do
  102 + rm -rf huggingface
  103 + export GIT_LFS_SKIP_SMUDGE=1
  104 + export GIT_CLONE_PROTECTION_ACTIVE=false
  105 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
  106 + cp -av $m/* huggingface
  107 + cd huggingface
  108 + git lfs track "*.onnx"
  109 + git lfs track "*.wav"
  110 + git status
  111 + git add .
  112 + git status
  113 + git commit -m "first commit"
  114 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
  115 + cd ..
  116 + done
@@ -61,6 +61,11 @@ jobs: @@ -61,6 +61,11 @@ jobs:
61 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 61 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
62 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k 62 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
63 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000 63 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
  64 + sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
  65 + sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
  66 + sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
  67 + sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  68 + sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
64 ) 69 )
65 70
66 for m in ${models[@]}; do 71 for m in ${models[@]}; do
@@ -88,6 +93,11 @@ jobs: @@ -88,6 +93,11 @@ jobs:
88 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 93 sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
89 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k 94 sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
90 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000 95 sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
  96 + sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
  97 + sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
  98 + sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
  99 + sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  100 + sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
91 ) 101 )
92 for d in ${dirs[@]}; do 102 for d in ${dirs[@]}; do
93 tar cjvf ${d}.tar.bz2 ./$d 103 tar cjvf ${d}.tar.bz2 ./$d
@@ -54,13 +54,18 @@ jobs: @@ -54,13 +54,18 @@ jobs:
54 curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt 54 curl -SL -O https://hf-mirror.com/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-small/resolve/main/test_wavs/trans.txt
55 popd 55 popd
56 56
57 - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms  
58 - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms  
59 - cp -av test_wavs ./sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms  
60 -  
61 - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms  
62 - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms  
63 - tar cjvf sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms.tar.bz2 sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms 57 + models=(
  58 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
  59 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
  60 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
  61 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
  62 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
  63 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
  64 + )
  65 + for m in ${models[@]}; do
  66 + cp -av test_wavs $m
  67 + tar cjvf $m.tar.bz2 $m
  68 + done
64 69
65 - name: Release 70 - name: Release
66 uses: svenstaro/upload-release-action@v2 71 uses: svenstaro/upload-release-action@v2
@@ -71,3 +76,41 @@ jobs: @@ -71,3 +76,41 @@ jobs:
71 repo_name: k2-fsa/sherpa-onnx 76 repo_name: k2-fsa/sherpa-onnx
72 repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} 77 repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
73 tag: asr-models 78 tag: asr-models
  79 +
  80 + - name: Publish to huggingface
  81 + env:
  82 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  83 + uses: nick-fields/retry@v3
  84 + with:
  85 + max_attempts: 20
  86 + timeout_seconds: 200
  87 + shell: bash
  88 + command: |
  89 + git config --global user.email "csukuangfj@gmail.com"
  90 + git config --global user.name "Fangjun Kuang"
  91 +
  92 + models=(
  93 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms
  94 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms
  95 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms
  96 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-80ms-int8
  97 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-480ms-int8
  98 + sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-1040ms-int8
  99 + )
  100 +
  101 + for m in ${models[@]}; do
  102 + rm -rf huggingface
  103 + export GIT_LFS_SKIP_SMUDGE=1
  104 + export GIT_CLONE_PROTECTION_ACTIVE=false
  105 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
  106 + cp -av $m/* huggingface
  107 + cd huggingface
  108 + git lfs track "*.onnx"
  109 + git lfs track "*.wav"
  110 + git status
  111 + git add .
  112 + git status
  113 + git commit -m "first commit"
  114 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
  115 + cd ..
  116 + done
  1 +name: export-nemo-parakeet-tdt
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - refactor-export-nemo
  7 + workflow_dispatch:
  8 +
  9 +concurrency:
  10 + group: export-nemo-parakeet-tdt-${{ github.ref }}
  11 + cancel-in-progress: true
  12 +
  13 +jobs:
  14 + export-nemo-parakeet-tdt-0_6b-v2:
  15 + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
  16 + name: parakeet tdt
  17 + runs-on: ${{ matrix.os }}
  18 + strategy:
  19 + fail-fast: false
  20 + matrix:
  21 + os: [macos-latest]
  22 + python-version: ["3.10"]
  23 +
  24 + steps:
  25 + - uses: actions/checkout@v4
  26 +
  27 + - name: Setup Python ${{ matrix.python-version }}
  28 + uses: actions/setup-python@v5
  29 + with:
  30 + python-version: ${{ matrix.python-version }}
  31 +
  32 + - name: Install python dependencies
  33 + shell: bash
  34 + run: |
  35 + pip install \
  36 + nemo_toolkit['asr'] \
  37 + "numpy<2" \
  38 + ipython \
  39 + kaldi-native-fbank \
  40 + librosa \
  41 + onnx==1.17.0 \
  42 + onnxmltools==1.13.0 \
  43 + onnxruntime==1.17.1 \
  44 + soundfile
  45 +
  46 + - name: Run
  47 + shell: bash
  48 + run: |
  49 + cd scripts/nemo/parakeet-tdt_ctc-0.6b-ja
  50 + ./run-ctc.sh
  51 +
  52 + - name: Collect files
  53 + shell: bash
  54 + run: |
  55 + models=(
  56 + sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
  57 + )
  58 + for m in ${models[@]}; do
  59 + mv -v scripts/nemo/parakeet-tdt_ctc-0.6b-ja/$m .
  60 + tar cjfv $m.tar.bz2 $m
  61 + done
  62 +
  63 +
  64 + - name: Publish to huggingface
  65 + env:
  66 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  67 + uses: nick-fields/retry@v3
  68 + with:
  69 + max_attempts: 20
  70 + timeout_seconds: 200
  71 + shell: bash
  72 + command: |
  73 + git config --global user.email "csukuangfj@gmail.com"
  74 + git config --global user.name "Fangjun Kuang"
  75 +
  76 + models=(
  77 + sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
  78 + )
  79 +
  80 + for m in ${models[@]}; do
  81 + rm -rf huggingface
  82 + export GIT_LFS_SKIP_SMUDGE=1
  83 + export GIT_CLONE_PROTECTION_ACTIVE=false
  84 + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m huggingface
  85 + cp -av $m/* huggingface
  86 + cd huggingface
  87 + git lfs track "*.onnx"
  88 + git lfs track "*.wav"
  89 + git status
  90 + git add .
  91 + git status
  92 + git commit -m "first commit"
  93 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$m main
  94 + cd ..
  95 + done
  96 +
  97 + - name: Release
  98 + uses: svenstaro/upload-release-action@v2
  99 + with:
  100 + file_glob: true
  101 + file: ./*.tar.bz2
  102 + overwrite: true
  103 + repo_name: k2-fsa/sherpa-onnx
  104 + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  105 + tag: asr-models
@@ -568,6 +568,38 @@ def get_models(): @@ -568,6 +568,38 @@ def get_models():
568 popd 568 popd
569 """, 569 """,
570 ), 570 ),
  571 + Model(
  572 + model_name="sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8",
  573 + idx=33,
  574 + lang="en",
  575 + lang2="English",
  576 + short_name="parakeet_tdt_ctc_110m",
  577 + cmd="""
  578 + pushd $model_name
  579 +
  580 + rm -rfv test_wavs
  581 +
  582 + ls -lh
  583 +
  584 + popd
  585 + """,
  586 + ),
  587 + Model(
  588 + model_name="sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8",
  589 + idx=34,
  590 + lang="ja",
  591 + lang2="Japanese",
  592 + short_name="parakeet-tdt_ctc_0.6b_ja",
  593 + cmd="""
  594 + pushd $model_name
  595 +
  596 + rm -rfv test_wavs
  597 +
  598 + ls -lh
  599 +
  600 + popd
  601 + """,
  602 + ),
571 ] 603 ]
572 return models 604 return models
573 605
@@ -23,5 +23,6 @@ This folder contains scripts for exporting models from @@ -23,5 +23,6 @@ This folder contains scripts for exporting models from
23 - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc 23 - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/stt_multilingual_fastconformer_hybrid_large_pc
24 24
25 - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m 25 - https://catalog.ngc.nvidia.com/orgs/nvidia/teams/nemo/models/parakeet-tdt_ctc-110m
  26 + - https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja
26 27
27 to `sherpa-onnx`. 28 to `sherpa-onnx`.
@@ -6,6 +6,7 @@ from typing import Dict @@ -6,6 +6,7 @@ from typing import Dict
6 import nemo.collections.asr as nemo_asr 6 import nemo.collections.asr as nemo_asr
7 import onnx 7 import onnx
8 import torch 8 import torch
  9 +from onnxruntime.quantization import QuantType, quantize_dynamic
9 10
10 11
11 def get_args(): 12 def get_args():
@@ -86,6 +87,12 @@ def main(): @@ -86,6 +87,12 @@ def main():
86 } 87 }
87 add_meta_data(filename, meta_data) 88 add_meta_data(filename, meta_data)
88 89
  90 + quantize_dynamic(
  91 + model_input="./model.onnx",
  92 + model_output="./model.int8.onnx",
  93 + weight_type=QuantType.QUInt8,
  94 + )
  95 +
89 print("preprocessor", asr_model.cfg.preprocessor) 96 print("preprocessor", asr_model.cfg.preprocessor)
90 print(meta_data) 97 print(meta_data)
91 98
@@ -6,6 +6,7 @@ from typing import Dict @@ -6,6 +6,7 @@ from typing import Dict
6 import nemo.collections.asr as nemo_asr 6 import nemo.collections.asr as nemo_asr
7 import onnx 7 import onnx
8 import torch 8 import torch
  9 +from onnxruntime.quantization import QuantType, quantize_dynamic
9 10
10 11
11 def get_args(): 12 def get_args():
@@ -114,6 +115,11 @@ def main(): @@ -114,6 +115,11 @@ def main():
114 "comment": "Only the CTC branch is exported", 115 "comment": "Only the CTC branch is exported",
115 } 116 }
116 add_meta_data(filename, meta_data) 117 add_meta_data(filename, meta_data)
  118 + quantize_dynamic(
  119 + model_input="./model.onnx",
  120 + model_output="./model.int8.onnx",
  121 + weight_type=QuantType.QUInt8,
  122 + )
117 123
118 print(meta_data) 124 print(meta_data)
119 125
@@ -6,6 +6,7 @@ from typing import Dict @@ -6,6 +6,7 @@ from typing import Dict
6 import nemo.collections.asr as nemo_asr 6 import nemo.collections.asr as nemo_asr
7 import onnx 7 import onnx
8 import torch 8 import torch
  9 +from onnxruntime.quantization import QuantType, quantize_dynamic
9 10
10 11
11 def get_args(): 12 def get_args():
@@ -90,6 +91,13 @@ def main(): @@ -90,6 +91,13 @@ def main():
90 } 91 }
91 add_meta_data("encoder.onnx", meta_data) 92 add_meta_data("encoder.onnx", meta_data)
92 93
  94 + for m in ["encoder", "decoder", "joiner"]:
  95 + quantize_dynamic(
  96 + model_input=f"{m}.onnx",
  97 + model_output=f"{m}.int8.onnx",
  98 + weight_type=QuantType.QUInt8,
  99 + )
  100 +
93 print(meta_data) 101 print(meta_data)
94 102
95 103
@@ -6,6 +6,7 @@ from typing import Dict @@ -6,6 +6,7 @@ from typing import Dict
6 import nemo.collections.asr as nemo_asr 6 import nemo.collections.asr as nemo_asr
7 import onnx 7 import onnx
8 import torch 8 import torch
  9 +from onnxruntime.quantization import QuantType, quantize_dynamic
9 10
10 11
11 def get_args(): 12 def get_args():
@@ -122,6 +123,13 @@ def main(): @@ -122,6 +123,13 @@ def main():
122 } 123 }
123 add_meta_data("encoder.onnx", meta_data) 124 add_meta_data("encoder.onnx", meta_data)
124 125
  126 + for m in ["encoder", "decoder", "joiner"]:
  127 + quantize_dynamic(
  128 + model_input=f"{m}.onnx",
  129 + model_output=f"{m}.int8.onnx",
  130 + weight_type=QuantType.QUInt8,
  131 + )
  132 +
125 print(meta_data) 133 print(meta_data)
126 134
127 135
@@ -19,6 +19,12 @@ log "Process $name at $url" @@ -19,6 +19,12 @@ log "Process $name at $url"
19 d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000 19 d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000
20 mkdir -p $d 20 mkdir -p $d
21 mv -v model.onnx $d/ 21 mv -v model.onnx $d/
  22 +cp -v tokens.txt $d/
  23 +ls -lh $d
  24 +
  25 +d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
  26 +mkdir -p $d
  27 +mv -v model.int8.onnx $d/
22 mv -v tokens.txt $d/ 28 mv -v tokens.txt $d/
23 ls -lh $d 29 ls -lh $d
24 30
@@ -33,6 +39,12 @@ log "Process $name at $url" @@ -33,6 +39,12 @@ log "Process $name at $url"
33 d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500 39 d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500
34 mkdir -p $d 40 mkdir -p $d
35 mv -v model.onnx $d/ 41 mv -v model.onnx $d/
  42 +cp -v tokens.txt $d/
  43 +ls -lh $d
  44 +
  45 +d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
  46 +mkdir -p $d
  47 +mv -v model.int8.onnx $d/
36 mv -v tokens.txt $d/ 48 mv -v tokens.txt $d/
37 ls -lh $d 49 ls -lh $d
38 50
@@ -45,6 +57,12 @@ doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transduc @@ -45,6 +57,12 @@ doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transduc
45 d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424 57 d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424
46 mkdir -p $d 58 mkdir -p $d
47 mv -v model.onnx $d/ 59 mv -v model.onnx $d/
  60 +cp -v tokens.txt $d/
  61 +ls -lh $d
  62 +
  63 +d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
  64 +mkdir -p $d
  65 +mv -v model.int8.onnx $d/
48 mv -v tokens.txt $d/ 66 mv -v tokens.txt $d/
49 ls -lh $d 67 ls -lh $d
50 68
@@ -57,6 +75,12 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer @@ -57,6 +75,12 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer
57 d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 75 d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
58 mkdir -p $d 76 mkdir -p $d
59 mv -v model.onnx $d/ 77 mv -v model.onnx $d/
  78 +cp -v tokens.txt $d/
  79 +ls -lh $d
  80 +
  81 +d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
  82 +mkdir -p $d
  83 +mv -v model.int8.onnx $d/
60 mv -v tokens.txt $d/ 84 mv -v tokens.txt $d/
61 ls -lh $d 85 ls -lh $d
62 86
@@ -69,6 +93,12 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer @@ -69,6 +93,12 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer
69 d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k 93 d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
70 mkdir -p $d 94 mkdir -p $d
71 mv -v model.onnx $d/ 95 mv -v model.onnx $d/
  96 +cp -v tokens.txt $d/
  97 +ls -lh $d
  98 +
  99 +d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  100 +mkdir -p $d
  101 +mv -v model.int8.onnx $d/
72 mv -v tokens.txt $d/ 102 mv -v tokens.txt $d/
73 ls -lh $d 103 ls -lh $d
74 104
@@ -92,6 +122,16 @@ mkdir -p $d/test_wavs @@ -92,6 +122,16 @@ mkdir -p $d/test_wavs
92 cp en.wav $d/test_wavs/0.wav 122 cp en.wav $d/test_wavs/0.wav
93 cp -v $data/en-english.wav $d/test_wavs/1.wav 123 cp -v $data/en-english.wav $d/test_wavs/1.wav
94 124
  125 +d=sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8
  126 +python3 ./test-onnx-ctc-non-streaming.py \
  127 + --model $d/model.int8.onnx \
  128 + --tokens $d/tokens.txt \
  129 + --wav $data/en-english.wav
  130 +mkdir -p $d/test_wavs
  131 +
  132 +cp en.wav $d/test_wavs/0.wav
  133 +cp -v $data/en-english.wav $d/test_wavs/1.wav
  134 +
95 d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500 135 d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500
96 python3 ./test-onnx-ctc-non-streaming.py \ 136 python3 ./test-onnx-ctc-non-streaming.py \
97 --model $d/model.onnx \ 137 --model $d/model.onnx \
@@ -101,6 +141,15 @@ mkdir -p $d/test_wavs @@ -101,6 +141,15 @@ mkdir -p $d/test_wavs
101 cp en.wav $d/test_wavs/0.wav 141 cp en.wav $d/test_wavs/0.wav
102 cp -v $data/en-english.wav $d/test_wavs 142 cp -v $data/en-english.wav $d/test_wavs
103 143
  144 +d=sherpa-onnx-nemo-fast-conformer-ctc-en-24500-int8
  145 +python3 ./test-onnx-ctc-non-streaming.py \
  146 + --model $d/model.int8.onnx \
  147 + --tokens $d/tokens.txt \
  148 + --wav $data/en-english.wav
  149 +mkdir -p $d/test_wavs
  150 +cp en.wav $d/test_wavs/0.wav
  151 +cp -v $data/en-english.wav $d/test_wavs
  152 +
104 d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424 153 d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424
105 python3 ./test-onnx-ctc-non-streaming.py \ 154 python3 ./test-onnx-ctc-non-streaming.py \
106 --model $d/model.onnx \ 155 --model $d/model.onnx \
@@ -109,6 +158,14 @@ python3 ./test-onnx-ctc-non-streaming.py \ @@ -109,6 +158,14 @@ python3 ./test-onnx-ctc-non-streaming.py \
109 mkdir -p $d/test_wavs 158 mkdir -p $d/test_wavs
110 cp -v $data/es-spanish.wav $d/test_wavs 159 cp -v $data/es-spanish.wav $d/test_wavs
111 160
  161 +d=sherpa-onnx-nemo-fast-conformer-ctc-es-1424-int8
  162 +python3 ./test-onnx-ctc-non-streaming.py \
  163 + --model $d/model.int8.onnx \
  164 + --tokens $d/tokens.txt \
  165 + --wav $data/es-spanish.wav
  166 +mkdir -p $d/test_wavs
  167 +cp -v $data/es-spanish.wav $d/test_wavs
  168 +
112 d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288 169 d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288
113 mkdir -p $d/test_wavs 170 mkdir -p $d/test_wavs
114 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do 171 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
@@ -119,6 +176,16 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do @@ -119,6 +176,16 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
119 cp -v $data/$w $d/test_wavs 176 cp -v $data/$w $d/test_wavs
120 done 177 done
121 178
  179 +d=sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288-int8
  180 +mkdir -p $d/test_wavs
  181 +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
  182 + python3 ./test-onnx-ctc-non-streaming.py \
  183 + --model $d/model.int8.onnx \
  184 + --tokens $d/tokens.txt \
  185 + --wav $data/$w
  186 + cp -v $data/$w $d/test_wavs
  187 +done
  188 +
122 d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k 189 d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k
123 mkdir -p $d/test_wavs 190 mkdir -p $d/test_wavs
124 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do 191 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
@@ -128,3 +195,13 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.w @@ -128,3 +195,13 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.w
128 --wav $data/$w 195 --wav $data/$w
129 cp -v $data/$w $d/test_wavs 196 cp -v $data/$w $d/test_wavs
130 done 197 done
  198 +
  199 +d=sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  200 +mkdir -p $d/test_wavs
  201 +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
  202 + python3 ./test-onnx-ctc-non-streaming.py \
  203 + --model $d/model.int8.onnx \
  204 + --tokens $d/tokens.txt \
  205 + --wav $data/$w
  206 + cp -v $data/$w $d/test_wavs
  207 +done
@@ -17,11 +17,22 @@ ms=( @@ -17,11 +17,22 @@ ms=(
17 for m in ${ms[@]}; do 17 for m in ${ms[@]}; do
18 ./export-onnx-ctc.py --model $m 18 ./export-onnx-ctc.py --model $m
19 d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms 19 d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms
  20 +
  21 + d_int8=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms-int8
  22 +
20 if [ ! -f $d/model.onnx ]; then 23 if [ ! -f $d/model.onnx ]; then
21 - mkdir -p $d 24 + mkdir -p $d $d_int8
22 mv -v model.onnx $d/ 25 mv -v model.onnx $d/
23 - mv -v tokens.txt $d/ 26 + cp -v tokens.txt $d/
  27 +
  28 + mv -v model.int8.onnx $d_int8/
  29 + mv -v tokens.txt $d_int8/
  30 +
  31 + echo "---$d---"
24 ls -lh $d 32 ls -lh $d
  33 +
  34 + echo "---$d_int8---"
  35 + ls -lh $d_int8
25 fi 36 fi
26 done 37 done
27 38
@@ -29,8 +40,16 @@ done @@ -29,8 +40,16 @@ done
29 40
30 for m in ${ms[@]}; do 41 for m in ${ms[@]}; do
31 d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms 42 d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms
  43 + echo "---$d---"
32 python3 ./test-onnx-ctc.py \ 44 python3 ./test-onnx-ctc.py \
33 --model $d/model.onnx \ 45 --model $d/model.onnx \
34 --tokens $d/tokens.txt \ 46 --tokens $d/tokens.txt \
35 --wav ./0.wav 47 --wav ./0.wav
  48 +
  49 + d=sherpa-onnx-nemo-streaming-fast-conformer-ctc-en-${m}ms-int8
  50 + echo "---$d---"
  51 + python3 ./test-onnx-ctc.py \
  52 + --model $d/model.int8.onnx \
  53 + --tokens $d/tokens.txt \
  54 + --wav ./0.wav
36 done 55 done
@@ -18,7 +18,17 @@ log "Process $name at $url" @@ -18,7 +18,17 @@ log "Process $name at $url"
18 ./export-onnx-transducer-non-streaming.py --model $name --doc "$doc" 18 ./export-onnx-transducer-non-streaming.py --model $name --doc "$doc"
19 d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000 19 d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000
20 mkdir -p $d 20 mkdir -p $d
21 -mv -v *.onnx $d/ 21 +mv -v encoder.onnx $d/
  22 +mv -v decoder.onnx $d/
  23 +mv -v joiner.onnx $d/
  24 +cp -v tokens.txt $d/
  25 +ls -lh $d
  26 +
  27 +d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
  28 +mkdir -p $d
  29 +mv -v encoder.int8.onnx $d/
  30 +mv -v decoder.int8.onnx $d/
  31 +mv -v joiner.int8.onnx $d/
22 mv -v tokens.txt $d/ 32 mv -v tokens.txt $d/
23 ls -lh $d 33 ls -lh $d
24 34
@@ -32,7 +42,17 @@ log "Process $name at $url" @@ -32,7 +42,17 @@ log "Process $name at $url"
32 42
33 d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500 43 d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500
34 mkdir -p $d 44 mkdir -p $d
35 -mv -v *.onnx $d/ 45 +mv -v encoder.onnx $d/
  46 +mv -v decoder.onnx $d/
  47 +mv -v joiner.onnx $d/
  48 +cp -v tokens.txt $d/
  49 +ls -lh $d
  50 +
  51 +d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
  52 +mkdir -p $d
  53 +mv -v encoder.int8.onnx $d/
  54 +mv -v decoder.int8.onnx $d/
  55 +mv -v joiner.int8.onnx $d/
36 mv -v tokens.txt $d/ 56 mv -v tokens.txt $d/
37 ls -lh $d 57 ls -lh $d
38 58
@@ -44,7 +64,17 @@ doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transduc @@ -44,7 +64,17 @@ doc="This collection contains the Spanish FastConformer Hybrid (CTC and Transduc
44 64
45 d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424 65 d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424
46 mkdir -p $d 66 mkdir -p $d
47 -mv -v *.onnx $d/ 67 +mv -v encoder.onnx $d/
  68 +mv -v decoder.onnx $d/
  69 +mv -v joiner.onnx $d/
  70 +cp -v tokens.txt $d/
  71 +ls -lh $d
  72 +
  73 +d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
  74 +mkdir -p $d
  75 +mv -v encoder.int8.onnx $d/
  76 +mv -v decoder.int8.onnx $d/
  77 +mv -v joiner.int8.onnx $d/
48 mv -v tokens.txt $d/ 78 mv -v tokens.txt $d/
49 ls -lh $d 79 ls -lh $d
50 80
@@ -56,7 +86,17 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer @@ -56,7 +86,17 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer
56 86
57 d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 87 d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
58 mkdir -p $d 88 mkdir -p $d
59 -mv -v *.onnx $d/ 89 +mv -v encoder.onnx $d/
  90 +mv -v decoder.onnx $d/
  91 +mv -v joiner.onnx $d/
  92 +cp -v tokens.txt $d/
  93 +ls -lh $d
  94 +
  95 +d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
  96 +mkdir -p $d
  97 +mv -v encoder.int8.onnx $d/
  98 +mv -v decoder.int8.onnx $d/
  99 +mv -v joiner.int8.onnx $d/
60 mv -v tokens.txt $d/ 100 mv -v tokens.txt $d/
61 ls -lh $d 101 ls -lh $d
62 102
@@ -68,7 +108,17 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer @@ -68,7 +108,17 @@ doc="This collection contains the Multilingual FastConformer Hybrid (Transducer
68 108
69 d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k 109 d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
70 mkdir -p $d 110 mkdir -p $d
71 -mv -v *.onnx $d/ 111 +mv -v encoder.onnx $d/
  112 +mv -v decoder.onnx $d/
  113 +mv -v joiner.onnx $d/
  114 +cp -v tokens.txt $d/
  115 +ls -lh $d
  116 +
  117 +d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  118 +mkdir -p $d
  119 +mv -v encoder.int8.onnx $d/
  120 +mv -v decoder.int8.onnx $d/
  121 +mv -v joiner.int8.onnx $d/
72 mv -v tokens.txt $d/ 122 mv -v tokens.txt $d/
73 ls -lh $d 123 ls -lh $d
74 124
@@ -101,6 +151,25 @@ mkdir -p $d/test_wavs @@ -101,6 +151,25 @@ mkdir -p $d/test_wavs
101 cp en.wav $d/test_wavs/0.wav 151 cp en.wav $d/test_wavs/0.wav
102 cp -v $data/en-english.wav $d/test_wavs 152 cp -v $data/en-english.wav $d/test_wavs
103 153
  154 +d=sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000-int8
  155 +python3 ./test-onnx-transducer-non-streaming.py \
  156 + --encoder $d/encoder.int8.onnx \
  157 + --decoder $d/decoder.int8.onnx \
  158 + --joiner $d/joiner.int8.onnx \
  159 + --tokens $d/tokens.txt \
  160 + --wav $data/en-english.wav
  161 +
  162 +python3 ./test-onnx-transducer-non-streaming.py \
  163 + --encoder $d/encoder.int8.onnx \
  164 + --decoder $d/decoder.int8.onnx \
  165 + --joiner $d/joiner.int8.onnx \
  166 + --tokens $d/tokens.txt \
  167 + --wav ./en.wav
  168 +
  169 +mkdir -p $d/test_wavs
  170 +cp en.wav $d/test_wavs/0.wav
  171 +cp -v $data/en-english.wav $d/test_wavs
  172 +
104 d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500 173 d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500
105 python3 ./test-onnx-transducer-non-streaming.py \ 174 python3 ./test-onnx-transducer-non-streaming.py \
106 --encoder $d/encoder.onnx \ 175 --encoder $d/encoder.onnx \
@@ -112,6 +181,17 @@ mkdir -p $d/test_wavs @@ -112,6 +181,17 @@ mkdir -p $d/test_wavs
112 cp en.wav $d/test_wavs/0.wav 181 cp en.wav $d/test_wavs/0.wav
113 cp -v $data/en-english.wav $d/test_wavs 182 cp -v $data/en-english.wav $d/test_wavs
114 183
  184 +d=sherpa-onnx-nemo-fast-conformer-transducer-en-24500-int8
  185 +python3 ./test-onnx-transducer-non-streaming.py \
  186 + --encoder $d/encoder.int8.onnx \
  187 + --decoder $d/decoder.int8.onnx \
  188 + --joiner $d/joiner.int8.onnx \
  189 + --tokens $d/tokens.txt \
  190 + --wav $data/en-english.wav
  191 +mkdir -p $d/test_wavs
  192 +cp en.wav $d/test_wavs/0.wav
  193 +cp -v $data/en-english.wav $d/test_wavs
  194 +
115 d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424 195 d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424
116 python3 ./test-onnx-transducer-non-streaming.py \ 196 python3 ./test-onnx-transducer-non-streaming.py \
117 --encoder $d/encoder.onnx \ 197 --encoder $d/encoder.onnx \
@@ -122,6 +202,16 @@ python3 ./test-onnx-transducer-non-streaming.py \ @@ -122,6 +202,16 @@ python3 ./test-onnx-transducer-non-streaming.py \
122 mkdir -p $d/test_wavs 202 mkdir -p $d/test_wavs
123 cp -v $data/es-spanish.wav $d/test_wavs 203 cp -v $data/es-spanish.wav $d/test_wavs
124 204
  205 +d=sherpa-onnx-nemo-fast-conformer-transducer-es-1424-int8
  206 +python3 ./test-onnx-transducer-non-streaming.py \
  207 + --encoder $d/encoder.int8.onnx \
  208 + --decoder $d/decoder.int8.onnx \
  209 + --joiner $d/joiner.int8.onnx \
  210 + --tokens $d/tokens.txt \
  211 + --wav $data/es-spanish.wav
  212 +mkdir -p $d/test_wavs
  213 +cp -v $data/es-spanish.wav $d/test_wavs
  214 +
125 d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288 215 d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288
126 mkdir -p $d/test_wavs 216 mkdir -p $d/test_wavs
127 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do 217 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
@@ -134,6 +224,18 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do @@ -134,6 +224,18 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
134 cp -v $data/$w $d/test_wavs 224 cp -v $data/$w $d/test_wavs
135 done 225 done
136 226
  227 +d=sherpa-onnx-nemo-fast-conformer-transducer-en-de-es-fr-14288-int8
  228 +mkdir -p $d/test_wavs
  229 +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav; do
  230 + python3 ./test-onnx-transducer-non-streaming.py \
  231 + --encoder $d/encoder.int8.onnx \
  232 + --decoder $d/decoder.int8.onnx \
  233 + --joiner $d/joiner.int8.onnx \
  234 + --tokens $d/tokens.txt \
  235 + --wav $data/$w
  236 + cp -v $data/$w $d/test_wavs
  237 +done
  238 +
137 d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k 239 d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k
138 mkdir -p $d/test_wavs 240 mkdir -p $d/test_wavs
139 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do 241 for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
@@ -145,3 +247,15 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.w @@ -145,3 +247,15 @@ for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.w
145 --wav $data/$w 247 --wav $data/$w
146 cp -v $data/$w $d/test_wavs 248 cp -v $data/$w $d/test_wavs
147 done 249 done
  250 +
  251 +d=sherpa-onnx-nemo-fast-conformer-transducer-be-de-en-es-fr-hr-it-pl-ru-uk-20k-int8
  252 +mkdir -p $d/test_wavs
  253 +for w in en-english.wav de-german.wav es-spanish.wav fr-french.wav hr-croatian.wav it-italian.wav po-polish.wav ru-russian.wav uk-ukrainian.wav; do
  254 + python3 ./test-onnx-transducer-non-streaming.py \
  255 + --encoder $d/encoder.int8.onnx \
  256 + --decoder $d/decoder.int8.onnx \
  257 + --joiner $d/joiner.int8.onnx \
  258 + --tokens $d/tokens.txt \
  259 + --wav $data/$w
  260 + cp -v $data/$w $d/test_wavs
  261 +done
@@ -17,13 +17,24 @@ ms=( @@ -17,13 +17,24 @@ ms=(
17 for m in ${ms[@]}; do 17 for m in ${ms[@]}; do
18 ./export-onnx-transducer.py --model $m 18 ./export-onnx-transducer.py --model $m
19 d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms 19 d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms
  20 + d_int8=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms-int8
20 if [ ! -f $d/encoder.onnx ]; then 21 if [ ! -f $d/encoder.onnx ]; then
21 - mkdir -p $d 22 + mkdir -p $d $d_int8
22 mv -v encoder.onnx $d/ 23 mv -v encoder.onnx $d/
23 mv -v decoder.onnx $d/ 24 mv -v decoder.onnx $d/
24 mv -v joiner.onnx $d/ 25 mv -v joiner.onnx $d/
25 - mv -v tokens.txt $d/ 26 + cp -v tokens.txt $d/
  27 +
  28 + mv -v encoder.int8.onnx $d_int8/
  29 + mv -v decoder.int8.onnx $d_int8/
  30 + mv -v joiner.int8.onnx $d_int8/
  31 + mv -v tokens.txt $d_int8/
  32 +
  33 + echo "---$d---"
26 ls -lh $d 34 ls -lh $d
  35 +
  36 + echo "---$d_int8---"
  37 + ls -lh $d_int8
27 fi 38 fi
28 done 39 done
29 40
@@ -37,4 +48,12 @@ for m in ${ms[@]}; do @@ -37,4 +48,12 @@ for m in ${ms[@]}; do
37 --joiner $d/joiner.onnx \ 48 --joiner $d/joiner.onnx \
38 --tokens $d/tokens.txt \ 49 --tokens $d/tokens.txt \
39 --wav ./0.wav 50 --wav ./0.wav
  51 +
  52 + d=sherpa-onnx-nemo-streaming-fast-conformer-transducer-en-${m}ms-int8
  53 + python3 ./test-onnx-transducer.py \
  54 + --encoder $d/encoder.int8.onnx \
  55 + --decoder $d/decoder.int8.onnx \
  56 + --joiner $d/joiner.int8.onnx \
  57 + --tokens $d/tokens.txt \
  58 + --wav ./0.wav
40 done 59 done
  1 +#!/usr/bin/env python3
  2 +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang)
  3 +import os
  4 +from typing import Dict
  5 +
  6 +import nemo.collections.asr as nemo_asr
  7 +import onnx
  8 +import torch
  9 +from onnxruntime.quantization import QuantType, quantize_dynamic
  10 +
  11 +
  12 +def add_meta_data(filename: str, meta_data: Dict[str, str]):
  13 + """Add meta data to an ONNX model. It is changed in-place.
  14 +
  15 + Args:
  16 + filename:
  17 + Filename of the ONNX model to be changed.
  18 + meta_data:
  19 + Key-value pairs.
  20 + """
  21 + model = onnx.load(filename)
  22 + while len(model.metadata_props):
  23 + model.metadata_props.pop()
  24 +
  25 + for key, value in meta_data.items():
  26 + meta = model.metadata_props.add()
  27 + meta.key = key
  28 + meta.value = str(value)
  29 +
  30 + onnx.save(model, filename)
  31 +
  32 +
  33 +@torch.no_grad()
  34 +def main():
  35 + asr_model = nemo_asr.models.ASRModel.from_pretrained(
  36 + model_name="nvidia/parakeet-tdt_ctc-0.6b-ja"
  37 + )
  38 +
  39 + print(asr_model.cfg)
  40 + print(asr_model)
  41 +
  42 + with open("./tokens.txt", "w", encoding="utf-8") as f:
  43 + for i, s in enumerate(asr_model.joint.vocabulary):
  44 + f.write(f"{s} {i}\n")
  45 + f.write(f"<blk> {i+1}\n")
  46 + print("Saved to tokens.txt")
  47 +
  48 + decoder_type = "ctc"
  49 + asr_model.change_decoding_strategy(decoder_type=decoder_type)
  50 + asr_model.eval()
  51 +
  52 + asr_model.set_export_config({"decoder_type": "ctc"})
  53 +
  54 + filename = "model.onnx"
  55 +
  56 + asr_model.export(filename, onnx_opset_version=18)
  57 +
  58 + normalize_type = asr_model.cfg.preprocessor.normalize
  59 + if normalize_type == "NA":
  60 + normalize_type = ""
  61 +
  62 + meta_data = {
  63 + "vocab_size": asr_model.decoder.vocab_size,
  64 + "normalize_type": normalize_type,
  65 + "subsampling_factor": 8,
  66 + "model_type": "EncDecHybridRNNTCTCBPEModel",
  67 + "version": "1",
  68 + "model_author": "NeMo",
  69 + "url": "https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja",
  70 + "comment": "Only the CTC branch is exported",
  71 + "doc": "See https://huggingface.co/nvidia/parakeet-tdt_ctc-0.6b-ja",
  72 + }
  73 +
  74 + os.system("ls -lh *.onnx")
  75 +
  76 + quantize_dynamic(
  77 + model_input="./model.onnx",
  78 + model_output="./model.int8.onnx",
  79 + weight_type=QuantType.QUInt8,
  80 + )
  81 +
  82 + add_meta_data("model.int8.onnx", meta_data)
  83 +
  84 + os.system("ls -lh *.onnx")
  85 +
  86 + print("preprocessor", asr_model.cfg.preprocessor)
  87 + print(meta_data)
  88 +
  89 +
  90 +if __name__ == "__main__":
  91 + main()
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +python3 ./export-onnx-ctc.py
  6 +
  7 +ls -lh *.onnx
  8 +
  9 +mkdir -p test_wavs
  10 +pushd test_wavs
  11 +curl -SL -O https://huggingface.co/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/transcripts.txt
  12 +curl -SL -O https://hf-mirror.com/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/test_ja_1.wav
  13 +curl -SL -O https://hf-mirror.com/csukuangfj/reazonspeech-k2-v2-ja-en/resolve/main/test_wavs/test_ja_2.wav
  14 +popd
  15 +
  16 +d=sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
  17 +
  18 +mkdir -p $d
  19 +mv -v model.int8.onnx $d/
  20 +cp -v tokens.txt $d/
  21 +cp -av test_wavs $d
  22 +ls -lh $d
  23 +
  24 +
  25 +d=sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8
  26 +python3 ./test-onnx-ctc-non-streaming.py \
  27 + --model $d/model.int8.onnx \
  28 + --tokens $d/tokens.txt \
  29 + --wav $d/test_wavs/test_ja_1.wav
  30 +
  31 +python3 ./test-onnx-ctc-non-streaming.py \
  32 + --model $d/model.int8.onnx \
  33 + --tokens $d/tokens.txt \
  34 + --wav $d/test_wavs/test_ja_2.wav
  1 +../fast-conformer-hybrid-transducer-ctc/test-onnx-ctc-non-streaming.py
@@ -601,6 +601,26 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { @@ -601,6 +601,26 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
601 tokens = "$modelDir/tokens.txt", 601 tokens = "$modelDir/tokens.txt",
602 ) 602 )
603 } 603 }
  604 +
  605 + 33 -> {
  606 + val modelDir = "sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8"
  607 + return OfflineModelConfig(
  608 + nemo = OfflineNemoEncDecCtcModelConfig(
  609 + model = "$modelDir/model.int8.onnx",
  610 + ),
  611 + tokens = "$modelDir/tokens.txt",
  612 + )
  613 + }
  614 +
  615 + 34 -> {
  616 + val modelDir = "sherpa-onnx-nemo-parakeet-tdt_ctc-0.6b-ja-35000-int8"
  617 + return OfflineModelConfig(
  618 + nemo = OfflineNemoEncDecCtcModelConfig(
  619 + model = "$modelDir/model.int8.onnx",
  620 + ),
  621 + tokens = "$modelDir/tokens.txt",
  622 + )
  623 + }
604 } 624 }
605 return null 625 return null
606 } 626 }