Committed by
GitHub
Export https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3 to sherpa-onnx (#2500)
This PR adds support for the newer version (v3) of NVIDIA's parakeet-tdt-0.6b model by exporting it to sherpa-onnx format. The v3 model supports 25 languages, maintaining the same usage pattern as v2 but with improved language coverage.
正在显示
10 个修改的文件
包含
271 行增加
和
74 行删除
| 1 | -name: export-nemo-parakeet-tdt-0.6b-v2 | 1 | +name: export-nemo-parakeet-tdt-0.6b |
| 2 | 2 | ||
| 3 | on: | 3 | on: |
| 4 | push: | 4 | push: |
| @@ -10,81 +10,111 @@ concurrency: | @@ -10,81 +10,111 @@ concurrency: | ||
| 10 | group: export-nemo-parakeet-tdt-0.6b-v2-${{ github.ref }} | 10 | group: export-nemo-parakeet-tdt-0.6b-v2-${{ github.ref }} |
| 11 | cancel-in-progress: true | 11 | cancel-in-progress: true |
| 12 | 12 | ||
| 13 | +env: | ||
| 14 | + HF_HUB_ENABLE_HF_TRANSFER: "0" | ||
| 15 | + | ||
| 13 | jobs: | 16 | jobs: |
| 14 | - export-nemo-parakeet-tdt-0_6b-v2: | 17 | + export-nemo-parakeet-tdt-0_6b: |
| 15 | if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' | 18 | if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' |
| 16 | - name: parakeet tdt 0.6b v2 | 19 | + name: parakeet tdt 0.6b ${{ matrix.version }} |
| 17 | runs-on: ${{ matrix.os }} | 20 | runs-on: ${{ matrix.os }} |
| 18 | strategy: | 21 | strategy: |
| 19 | fail-fast: false | 22 | fail-fast: false |
| 20 | matrix: | 23 | matrix: |
| 21 | os: [macos-latest] | 24 | os: [macos-latest] |
| 22 | python-version: ["3.10"] | 25 | python-version: ["3.10"] |
| 26 | + version: ["v2", "v3"] | ||
| 23 | 27 | ||
| 24 | steps: | 28 | steps: |
| 25 | - uses: actions/checkout@v4 | 29 | - uses: actions/checkout@v4 |
| 26 | 30 | ||
| 31 | + - name: Show disk space | ||
| 32 | + run: | | ||
| 33 | + df -h | ||
| 34 | + | ||
| 35 | + # See https://github.com/vlayer-xyz/vlayer/pull/543/files | ||
| 36 | + # Free up disk space as the macOS runners end up using most for Xcode | ||
| 37 | + # versions we don't need and use iOS simulators. | ||
| 38 | + - name: Free up disk space | ||
| 39 | + run: | | ||
| 40 | + echo '*** Delete iOS simulators and their caches' | ||
| 41 | + xcrun simctl delete all | ||
| 42 | + sudo rm -rf ~/Library/Developer/CoreSimulator/Caches/* | ||
| 43 | + | ||
| 44 | + - name: Show disk space | ||
| 45 | + run: | | ||
| 46 | + df -h | ||
| 47 | + | ||
| 27 | - name: Setup Python ${{ matrix.python-version }} | 48 | - name: Setup Python ${{ matrix.python-version }} |
| 28 | uses: actions/setup-python@v5 | 49 | uses: actions/setup-python@v5 |
| 29 | with: | 50 | with: |
| 30 | python-version: ${{ matrix.python-version }} | 51 | python-version: ${{ matrix.python-version }} |
| 31 | 52 | ||
| 32 | - - name: Run | 53 | + - name: Run ${{ matrix.version }} |
| 54 | + if: matrix.version == 'v2' | ||
| 33 | shell: bash | 55 | shell: bash |
| 34 | run: | | 56 | run: | |
| 35 | cd scripts/nemo/parakeet-tdt-0.6b-v2 | 57 | cd scripts/nemo/parakeet-tdt-0.6b-v2 |
| 36 | ./run.sh | 58 | ./run.sh |
| 37 | 59 | ||
| 38 | ls -lh *.onnx | 60 | ls -lh *.onnx |
| 61 | + ls -lh *.weights | ||
| 62 | + | ||
| 39 | mv -v *.onnx ../../.. | 63 | mv -v *.onnx ../../.. |
| 64 | + mv -v *.weights ../../.. | ||
| 40 | mv -v tokens.txt ../../.. | 65 | mv -v tokens.txt ../../.. |
| 41 | mv 2086-149220-0033.wav ../../../0.wav | 66 | mv 2086-149220-0033.wav ../../../0.wav |
| 42 | 67 | ||
| 43 | - - name: Collect files (fp32) | 68 | + - name: Run ${{ matrix.version }} |
| 69 | + if: matrix.version == 'v3' | ||
| 44 | shell: bash | 70 | shell: bash |
| 45 | run: | | 71 | run: | |
| 46 | - d=sherpa-onnx-nemo-parakeet-tdt-0.6b-v2 | ||
| 47 | - mkdir -p $d | ||
| 48 | - cp encoder.int8.onnx $d | ||
| 49 | - cp decoder.onnx $d | ||
| 50 | - cp joiner.onnx $d | ||
| 51 | - cp tokens.txt $d | ||
| 52 | - | ||
| 53 | - mkdir $d/test_wavs | ||
| 54 | - cp 0.wav $d/test_wavs | 72 | + cd scripts/nemo/parakeet-tdt-0.6b-v3 |
| 73 | + ./run.sh | ||
| 55 | 74 | ||
| 56 | - tar cjfv $d.tar.bz2 $d | 75 | + ls -lh *.onnx |
| 76 | + mv -v *.onnx ../../.. | ||
| 77 | + mv -v *.weights ../../.. | ||
| 78 | + mv -v tokens.txt ../../.. | ||
| 79 | + mv *.wav ../../../ | ||
| 57 | 80 | ||
| 58 | - - name: Collect files (int8) | 81 | + - name: Collect files (fp32) |
| 59 | shell: bash | 82 | shell: bash |
| 60 | run: | | 83 | run: | |
| 61 | - d=sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8 | 84 | + version=${{ matrix.version }} |
| 85 | + d=sherpa-onnx-nemo-parakeet-tdt-0.6b-$version | ||
| 62 | mkdir -p $d | 86 | mkdir -p $d |
| 63 | - cp encoder.int8.onnx $d | ||
| 64 | - cp decoder.int8.onnx $d | ||
| 65 | - cp joiner.int8.onnx $d | ||
| 66 | - cp tokens.txt $d | 87 | + cp -v encoder.onnx $d |
| 88 | + cp -v encoder.weights $d | ||
| 89 | + cp -v decoder.onnx $d | ||
| 90 | + cp -v joiner.onnx $d | ||
| 91 | + cp -v tokens.txt $d | ||
| 67 | 92 | ||
| 68 | mkdir $d/test_wavs | 93 | mkdir $d/test_wavs |
| 69 | - cp 0.wav $d/test_wavs | 94 | + cp -v *.wav $d/test_wavs |
| 70 | 95 | ||
| 71 | - tar cjfv $d.tar.bz2 $d | 96 | + # tar cjfv $d.tar.bz2 $d |
| 97 | + | ||
| 98 | + # ls -lh *.tar.bz2 | ||
| 72 | 99 | ||
| 73 | - - name: Collect files (fp16) | 100 | + - name: Collect files (int8) |
| 74 | shell: bash | 101 | shell: bash |
| 75 | run: | | 102 | run: | |
| 76 | - d=sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-fp16 | 103 | + version=${{ matrix.version }} |
| 104 | + d=sherpa-onnx-nemo-parakeet-tdt-0.6b-$version-int8 | ||
| 77 | mkdir -p $d | 105 | mkdir -p $d |
| 78 | - cp encoder.fp16.onnx $d | ||
| 79 | - cp decoder.fp16.onnx $d | ||
| 80 | - cp joiner.fp16.onnx $d | ||
| 81 | - cp tokens.txt $d | 106 | + cp -v encoder.int8.onnx $d |
| 107 | + cp -v decoder.int8.onnx $d | ||
| 108 | + cp -v joiner.int8.onnx $d | ||
| 109 | + cp -v tokens.txt $d | ||
| 82 | 110 | ||
| 83 | mkdir $d/test_wavs | 111 | mkdir $d/test_wavs |
| 84 | - cp 0.wav $d/test_wavs | 112 | + cp -v *.wav $d/test_wavs |
| 85 | 113 | ||
| 86 | tar cjfv $d.tar.bz2 $d | 114 | tar cjfv $d.tar.bz2 $d |
| 87 | 115 | ||
| 116 | + ls -lh *.tar.bz2 | ||
| 117 | + | ||
| 88 | - name: Publish to huggingface | 118 | - name: Publish to huggingface |
| 89 | env: | 119 | env: |
| 90 | HF_TOKEN: ${{ secrets.HF_TOKEN }} | 120 | HF_TOKEN: ${{ secrets.HF_TOKEN }} |
| @@ -94,13 +124,13 @@ jobs: | @@ -94,13 +124,13 @@ jobs: | ||
| 94 | timeout_seconds: 200 | 124 | timeout_seconds: 200 |
| 95 | shell: bash | 125 | shell: bash |
| 96 | command: | | 126 | command: | |
| 127 | + version=${{ matrix.version }} | ||
| 97 | git config --global user.email "csukuangfj@gmail.com" | 128 | git config --global user.email "csukuangfj@gmail.com" |
| 98 | git config --global user.name "Fangjun Kuang" | 129 | git config --global user.name "Fangjun Kuang" |
| 99 | 130 | ||
| 100 | models=( | 131 | models=( |
| 101 | - sherpa-onnx-nemo-parakeet-tdt-0.6b-v2 | ||
| 102 | - sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8 | ||
| 103 | - sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-fp16 | 132 | + sherpa-onnx-nemo-parakeet-tdt-0.6b-$version |
| 133 | + sherpa-onnx-nemo-parakeet-tdt-0.6b-$version-int8 | ||
| 104 | ) | 134 | ) |
| 105 | 135 | ||
| 106 | for m in ${models[@]}; do | 136 | for m in ${models[@]}; do |
| @@ -112,6 +142,7 @@ jobs: | @@ -112,6 +142,7 @@ jobs: | ||
| 112 | cd huggingface | 142 | cd huggingface |
| 113 | git lfs track "*.onnx" | 143 | git lfs track "*.onnx" |
| 114 | git lfs track "*.wav" | 144 | git lfs track "*.wav" |
| 145 | + git lfs track "*.weights" | ||
| 115 | git status | 146 | git status |
| 116 | git add . | 147 | git add . |
| 117 | git status | 148 | git status |
| @@ -681,6 +681,22 @@ def get_models(): | @@ -681,6 +681,22 @@ def get_models(): | ||
| 681 | popd | 681 | popd |
| 682 | """, | 682 | """, |
| 683 | ), | 683 | ), |
| 684 | + Model( | ||
| 685 | + model_name="sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8", | ||
| 686 | + idx=40, | ||
| 687 | + lang="multi", | ||
| 688 | + lang2="25_languages", | ||
| 689 | + short_name="parakeet_tdt_0.6b_v3", | ||
| 690 | + cmd=""" | ||
| 691 | + pushd $model_name | ||
| 692 | + | ||
| 693 | + rm -rfv test_wavs | ||
| 694 | + | ||
| 695 | + ls -lh | ||
| 696 | + | ||
| 697 | + popd | ||
| 698 | + """, | ||
| 699 | + ), | ||
| 684 | ] | 700 | ] |
| 685 | return models | 701 | return models |
| 686 | 702 |
| 1 | #!/usr/bin/env python3 | 1 | #!/usr/bin/env python3 |
| 2 | # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | 2 | # Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) |
| 3 | 3 | ||
| 4 | +import os | ||
| 4 | from pathlib import Path | 5 | from pathlib import Path |
| 5 | from typing import Dict | 6 | from typing import Dict |
| 6 | -import os | ||
| 7 | 7 | ||
| 8 | import nemo.collections.asr as nemo_asr | 8 | import nemo.collections.asr as nemo_asr |
| 9 | import onnx | 9 | import onnx |
| 10 | -import onnxmltools | ||
| 11 | import torch | 10 | import torch |
| 12 | -from onnxmltools.utils.float16_converter import ( | ||
| 13 | - convert_float_to_float16, | ||
| 14 | - convert_float_to_float16_model_path, | ||
| 15 | -) | ||
| 16 | from onnxruntime.quantization import QuantType, quantize_dynamic | 11 | from onnxruntime.quantization import QuantType, quantize_dynamic |
| 17 | 12 | ||
| 18 | 13 | ||
| 19 | -def export_onnx_fp16(onnx_fp32_path, onnx_fp16_path): | ||
| 20 | - onnx_fp32_model = onnxmltools.utils.load_model(onnx_fp32_path) | ||
| 21 | - onnx_fp16_model = convert_float_to_float16(onnx_fp32_model, keep_io_types=True) | ||
| 22 | - onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) | ||
| 23 | - | ||
| 24 | - | ||
| 25 | -def export_onnx_fp16_large_2gb(onnx_fp32_path, onnx_fp16_path): | ||
| 26 | - onnx_fp16_model = convert_float_to_float16_model_path( | ||
| 27 | - onnx_fp32_path, keep_io_types=True | ||
| 28 | - ) | ||
| 29 | - onnxmltools.utils.save_model(onnx_fp16_model, onnx_fp16_path) | ||
| 30 | - | ||
| 31 | - | ||
| 32 | def add_meta_data(filename: str, meta_data: Dict[str, str]): | 14 | def add_meta_data(filename: str, meta_data: Dict[str, str]): |
| 33 | """Add meta data to an ONNX model. It is changed in-place. | 15 | """Add meta data to an ONNX model. It is changed in-place. |
| 34 | 16 | ||
| @@ -47,14 +29,29 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]): | @@ -47,14 +29,29 @@ def add_meta_data(filename: str, meta_data: Dict[str, str]): | ||
| 47 | meta.key = key | 29 | meta.key = key |
| 48 | meta.value = str(value) | 30 | meta.value = str(value) |
| 49 | 31 | ||
| 50 | - onnx.save(model, filename) | 32 | + if filename == "encoder.onnx": |
| 33 | + external_filename = "encoder" | ||
| 34 | + onnx.save( | ||
| 35 | + model, | ||
| 36 | + filename, | ||
| 37 | + save_as_external_data=True, | ||
| 38 | + all_tensors_to_one_file=True, | ||
| 39 | + location=external_filename + ".weights", | ||
| 40 | + ) | ||
| 41 | + else: | ||
| 42 | + onnx.save(model, filename) | ||
| 51 | 43 | ||
| 52 | 44 | ||
| 53 | @torch.no_grad() | 45 | @torch.no_grad() |
| 54 | def main(): | 46 | def main(): |
| 55 | - asr_model = nemo_asr.models.ASRModel.from_pretrained( | ||
| 56 | - model_name="nvidia/parakeet-tdt-0.6b-v2" | ||
| 57 | - ) | 47 | + if Path("./parakeet-tdt-0.6b-v2.nemo").is_file(): |
| 48 | + asr_model = nemo_asr.models.ASRModel.restore_from( | ||
| 49 | + restore_path="./parakeet-tdt-0.6b-v2.nemo" | ||
| 50 | + ) | ||
| 51 | + else: | ||
| 52 | + asr_model = nemo_asr.models.ASRModel.from_pretrained( | ||
| 53 | + model_name="nvidia/parakeet-tdt-0.6b-v2" | ||
| 54 | + ) | ||
| 58 | 55 | ||
| 59 | asr_model.eval() | 56 | asr_model.eval() |
| 60 | 57 | ||
| @@ -95,13 +92,8 @@ def main(): | @@ -95,13 +92,8 @@ def main(): | ||
| 95 | ) | 92 | ) |
| 96 | os.system("ls -lh *.onnx") | 93 | os.system("ls -lh *.onnx") |
| 97 | 94 | ||
| 98 | - if m == "encoder": | ||
| 99 | - export_onnx_fp16_large_2gb(f"{m}.onnx", f"{m}.fp16.onnx") | ||
| 100 | - else: | ||
| 101 | - export_onnx_fp16(f"{m}.onnx", f"{m}.fp16.onnx") | ||
| 102 | - | ||
| 103 | add_meta_data("encoder.int8.onnx", meta_data) | 95 | add_meta_data("encoder.int8.onnx", meta_data) |
| 104 | - add_meta_data("encoder.fp16.onnx", meta_data) | 96 | + add_meta_data("encoder.onnx", meta_data) |
| 105 | print("meta_data", meta_data) | 97 | print("meta_data", meta_data) |
| 106 | 98 | ||
| 107 | 99 |
| @@ -9,8 +9,9 @@ log() { | @@ -9,8 +9,9 @@ log() { | ||
| 9 | echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | 9 | echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" |
| 10 | } | 10 | } |
| 11 | 11 | ||
| 12 | -curl -SL -O https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav | 12 | +curl -SL -O https://huggingface.co/nvidia/parakeet-tdt-0.6b-v2/resolve/main/parakeet-tdt-0.6b-v2.nemo |
| 13 | 13 | ||
| 14 | +curl -SL -O https://dldata-public.s3.us-east-2.amazonaws.com/2086-149220-0033.wav | ||
| 14 | 15 | ||
| 15 | 16 | ||
| 16 | pip install \ | 17 | pip install \ |
| @@ -20,7 +21,6 @@ pip install \ | @@ -20,7 +21,6 @@ pip install \ | ||
| 20 | kaldi-native-fbank \ | 21 | kaldi-native-fbank \ |
| 21 | librosa \ | 22 | librosa \ |
| 22 | onnx==1.17.0 \ | 23 | onnx==1.17.0 \ |
| 23 | - onnxmltools \ | ||
| 24 | onnxruntime==1.17.1 \ | 24 | onnxruntime==1.17.1 \ |
| 25 | soundfile | 25 | soundfile |
| 26 | 26 | ||
| @@ -42,11 +42,3 @@ python3 ./test_onnx.py \ | @@ -42,11 +42,3 @@ python3 ./test_onnx.py \ | ||
| 42 | --joiner ./joiner.int8.onnx \ | 42 | --joiner ./joiner.int8.onnx \ |
| 43 | --tokens ./tokens.txt \ | 43 | --tokens ./tokens.txt \ |
| 44 | --wav 2086-149220-0033.wav | 44 | --wav 2086-149220-0033.wav |
| 45 | - | ||
| 46 | -echo "---fp16----" | ||
| 47 | -python3 ./test_onnx.py \ | ||
| 48 | - --encoder ./encoder.fp16.onnx \ | ||
| 49 | - --decoder ./decoder.fp16.onnx \ | ||
| 50 | - --joiner ./joiner.fp16.onnx \ | ||
| 51 | - --tokens ./tokens.txt \ | ||
| 52 | - --wav 2086-149220-0033.wav |
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | + | ||
| 4 | +from pathlib import Path | ||
| 5 | +from typing import Dict | ||
| 6 | +import os | ||
| 7 | + | ||
| 8 | +import nemo.collections.asr as nemo_asr | ||
| 9 | +import onnx | ||
| 10 | +import torch | ||
| 11 | +from onnxruntime.quantization import QuantType, quantize_dynamic | ||
| 12 | + | ||
| 13 | + | ||
| 14 | +def add_meta_data(filename: str, meta_data: Dict[str, str]): | ||
| 15 | + """Add meta data to an ONNX model. It is changed in-place. | ||
| 16 | + | ||
| 17 | + Args: | ||
| 18 | + filename: | ||
| 19 | + Filename of the ONNX model to be changed. | ||
| 20 | + meta_data: | ||
| 21 | + Key-value pairs. | ||
| 22 | + """ | ||
| 23 | + model = onnx.load(filename) | ||
| 24 | + while len(model.metadata_props): | ||
| 25 | + model.metadata_props.pop() | ||
| 26 | + | ||
| 27 | + for key, value in meta_data.items(): | ||
| 28 | + meta = model.metadata_props.add() | ||
| 29 | + meta.key = key | ||
| 30 | + meta.value = str(value) | ||
| 31 | + | ||
| 32 | + if filename == "encoder.onnx": | ||
| 33 | + external_filename = "encoder" | ||
| 34 | + onnx.save( | ||
| 35 | + model, | ||
| 36 | + filename, | ||
| 37 | + save_as_external_data=True, | ||
| 38 | + all_tensors_to_one_file=True, | ||
| 39 | + location=external_filename + ".weights", | ||
| 40 | + ) | ||
| 41 | + else: | ||
| 42 | + onnx.save(model, filename) | ||
| 43 | + | ||
| 44 | + | ||
| 45 | +@torch.no_grad() | ||
| 46 | +def main(): | ||
| 47 | + if Path("./parakeet-tdt-0.6b-v3.nemo").is_file(): | ||
| 48 | + asr_model = nemo_asr.models.ASRModel.restore_from( | ||
| 49 | + restore_path="./parakeet-tdt-0.6b-v3.nemo" | ||
| 50 | + ) | ||
| 51 | + else: | ||
| 52 | + asr_model = nemo_asr.models.ASRModel.from_pretrained( | ||
| 53 | + model_name="nvidia/parakeet-tdt-0.6b-v3" | ||
| 54 | + ) | ||
| 55 | + | ||
| 56 | + asr_model.eval() | ||
| 57 | + | ||
| 58 | + with open("./tokens.txt", "w", encoding="utf-8") as f: | ||
| 59 | + for i, s in enumerate(asr_model.joint.vocabulary): | ||
| 60 | + f.write(f"{s} {i}\n") | ||
| 61 | + f.write(f"<blk> {i+1}\n") | ||
| 62 | + print("Saved to tokens.txt") | ||
| 63 | + | ||
| 64 | + asr_model.encoder.export("encoder.onnx") | ||
| 65 | + asr_model.decoder.export("decoder.onnx") | ||
| 66 | + asr_model.joint.export("joiner.onnx") | ||
| 67 | + os.system("ls -lh *.onnx") | ||
| 68 | + | ||
| 69 | + normalize_type = asr_model.cfg.preprocessor.normalize | ||
| 70 | + if normalize_type == "NA": | ||
| 71 | + normalize_type = "" | ||
| 72 | + | ||
| 73 | + meta_data = { | ||
| 74 | + "vocab_size": asr_model.decoder.vocab_size, | ||
| 75 | + "normalize_type": normalize_type, | ||
| 76 | + "pred_rnn_layers": asr_model.decoder.pred_rnn_layers, | ||
| 77 | + "pred_hidden": asr_model.decoder.pred_hidden, | ||
| 78 | + "subsampling_factor": 8, | ||
| 79 | + "model_type": "EncDecRNNTBPEModel", | ||
| 80 | + "version": "2", | ||
| 81 | + "model_author": "NeMo", | ||
| 82 | + "url": "https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3", | ||
| 83 | + "comment": "Only the transducer branch is exported", | ||
| 84 | + "feat_dim": 128, | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | + for m in ["encoder", "decoder", "joiner"]: | ||
| 88 | + quantize_dynamic( | ||
| 89 | + model_input=f"./{m}.onnx", | ||
| 90 | + model_output=f"./{m}.int8.onnx", | ||
| 91 | + weight_type=QuantType.QUInt8 if m == "encoder" else QuantType.QInt8, | ||
| 92 | + ) | ||
| 93 | + os.system("ls -lh *.onnx") | ||
| 94 | + | ||
| 95 | + add_meta_data("encoder.int8.onnx", meta_data) | ||
| 96 | + add_meta_data("encoder.onnx", meta_data) | ||
| 97 | + print("meta_data", meta_data) | ||
| 98 | + | ||
| 99 | + | ||
| 100 | +if __name__ == "__main__": | ||
| 101 | + main() |
scripts/nemo/parakeet-tdt-0.6b-v3/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# Copyright 2025 Xiaomi Corp. (authors: Fangjun Kuang) | ||
| 3 | + | ||
| 4 | +set -ex | ||
| 5 | + | ||
| 6 | +log() { | ||
| 7 | + # This function is from espnet | ||
| 8 | + local fname=${BASH_SOURCE[1]##*/} | ||
| 9 | + echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" | ||
| 10 | +} | ||
| 11 | + | ||
| 12 | +curl -SL -O https://huggingface.co/nvidia/parakeet-tdt-0.6b-v3/resolve/main/parakeet-tdt-0.6b-v3.nemo | ||
| 13 | + | ||
| 14 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/en.wav | ||
| 15 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/de.wav | ||
| 16 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/fr.wav | ||
| 17 | +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/es.wav | ||
| 18 | + | ||
| 19 | +ls -lh | ||
| 20 | + | ||
| 21 | + | ||
| 22 | +pip install \ | ||
| 23 | + nemo_toolkit['asr'] \ | ||
| 24 | + "numpy<2" \ | ||
| 25 | + ipython \ | ||
| 26 | + kaldi-native-fbank \ | ||
| 27 | + librosa \ | ||
| 28 | + onnx==1.17.0 \ | ||
| 29 | + onnxruntime==1.17.1 \ | ||
| 30 | + soundfile | ||
| 31 | + | ||
| 32 | +python3 ./export_onnx.py | ||
| 33 | +ls -lh *.onnx | ||
| 34 | + | ||
| 35 | +for w in en.wav de.wav fr.wav es.wav; do | ||
| 36 | + echo "---fp32----" | ||
| 37 | + python3 ./test_onnx.py \ | ||
| 38 | + --encoder ./encoder.int8.onnx \ | ||
| 39 | + --decoder ./decoder.onnx \ | ||
| 40 | + --joiner ./joiner.onnx \ | ||
| 41 | + --tokens ./tokens.txt \ | ||
| 42 | + --wav $w | ||
| 43 | + | ||
| 44 | + echo "---int8----" | ||
| 45 | + python3 ./test_onnx.py \ | ||
| 46 | + --encoder ./encoder.int8.onnx \ | ||
| 47 | + --decoder ./decoder.int8.onnx \ | ||
| 48 | + --joiner ./joiner.int8.onnx \ | ||
| 49 | + --tokens ./tokens.txt \ | ||
| 50 | + --wav $w | ||
| 51 | +done |
| 1 | +../parakeet-tdt-0.6b-v2/test_onnx.py |
| @@ -46,7 +46,7 @@ bool OfflineTtsModelConfig::Validate() const { | @@ -46,7 +46,7 @@ bool OfflineTtsModelConfig::Validate() const { | ||
| 46 | return kitten.Validate(); | 46 | return kitten.Validate(); |
| 47 | } | 47 | } |
| 48 | 48 | ||
| 49 | - SHERPA_ONNX_LOGE("Please provide at exactly one tts model."); | 49 | + SHERPA_ONNX_LOGE("Please provide exactly one tts model."); |
| 50 | 50 | ||
| 51 | return false; | 51 | return false; |
| 52 | } | 52 | } |
| @@ -65,7 +65,7 @@ struct GeneratedAudio { | @@ -65,7 +65,7 @@ struct GeneratedAudio { | ||
| 65 | 65 | ||
| 66 | class OfflineTtsImpl; | 66 | class OfflineTtsImpl; |
| 67 | 67 | ||
| 68 | -// If the callback returns 0, then it stop generating | 68 | +// If the callback returns 0, then it stops generating |
| 69 | // if the callback returns 1, then it keeps generating | 69 | // if the callback returns 1, then it keeps generating |
| 70 | using GeneratedAudioCallback = std::function<int32_t( | 70 | using GeneratedAudioCallback = std::function<int32_t( |
| 71 | const float * /*samples*/, int32_t /*n*/, float /*progress*/)>; | 71 | const float * /*samples*/, int32_t /*n*/, float /*progress*/)>; |
| @@ -677,6 +677,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | @@ -677,6 +677,19 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? { | ||
| 677 | tokens = "$modelDir/tokens.txt", | 677 | tokens = "$modelDir/tokens.txt", |
| 678 | ) | 678 | ) |
| 679 | } | 679 | } |
| 680 | + | ||
| 681 | + 40 -> { | ||
| 682 | + val modelDir = "sherpa-onnx-nemo-parakeet-tdt-0.6b-v3-int8" | ||
| 683 | + return OfflineModelConfig( | ||
| 684 | + transducer = OfflineTransducerModelConfig( | ||
| 685 | + encoder = "$modelDir/encoder.int8.onnx", | ||
| 686 | + decoder = "$modelDir/decoder.int8.onnx", | ||
| 687 | + joiner = "$modelDir/joiner.int8.onnx", | ||
| 688 | + ), | ||
| 689 | + tokens = "$modelDir/tokens.txt", | ||
| 690 | + modelType = "nemo_transducer", | ||
| 691 | + ) | ||
| 692 | + } | ||
| 680 | } | 693 | } |
| 681 | return null | 694 | return null |
| 682 | } | 695 | } |
-
请 注册 或 登录 后发表评论