Committed by
GitHub
Add support for GigaAM-CTC-v2 (#2135)
Related to #2098.
正在显示
3 个修改的文件
包含
133 行增加
和
0 行删除
| @@ -79,6 +79,31 @@ jobs: | @@ -79,6 +79,31 @@ jobs: | ||
| 79 | 79 | ||
| 80 | tar cjvf ${d}.tar.bz2 $d | 80 | tar cjvf ${d}.tar.bz2 $d |
| 81 | 81 | ||
| 82 | + - name: Run CTC v2 | ||
| 83 | + shell: bash | ||
| 84 | + run: | | ||
| 85 | + pushd scripts/nemo/GigaAM | ||
| 86 | + ./run-ctc-v2.sh | ||
| 87 | + popd | ||
| 88 | + | ||
| 89 | + d=sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19 | ||
| 90 | + mkdir $d | ||
| 91 | + mkdir $d/test_wavs | ||
| 92 | + rm scripts/nemo/GigaAM/v2_ctc.onnx | ||
| 93 | + mv -v scripts/nemo/GigaAM/*.int8.onnx $d/ | ||
| 94 | + cp -v scripts/nemo/GigaAM/LICENCE $d/ | ||
| 95 | + mv -v scripts/nemo/GigaAM/tokens.txt $d/ | ||
| 96 | + mv -v scripts/nemo/GigaAM/*.wav $d/test_wavs/ | ||
| 97 | + mv -v scripts/nemo/GigaAM/run-ctc.sh $d/ | ||
| 98 | + mv -v scripts/nemo/GigaAM/*-ctc-v2.py $d/ | ||
| 99 | + | ||
| 100 | + ls -lh scripts/nemo/GigaAM/ | ||
| 101 | + | ||
| 102 | + ls -lh $d | ||
| 103 | + | ||
| 104 | + tar cjvf ${d}.tar.bz2 $d | ||
| 105 | + | ||
| 106 | + | ||
| 82 | - name: Release | 107 | - name: Release |
| 83 | uses: svenstaro/upload-release-action@v2 | 108 | uses: svenstaro/upload-release-action@v2 |
| 84 | with: | 109 | with: |
| @@ -114,3 +139,29 @@ jobs: | @@ -114,3 +139,29 @@ jobs: | ||
| 114 | git status | 139 | git status |
| 115 | git commit -m "add models" | 140 | git commit -m "add models" |
| 116 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main | 141 | git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main |
| 142 | + | ||
| 143 | + - name: Publish v2 to huggingface (Transducer) | ||
| 144 | + env: | ||
| 145 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 146 | + uses: nick-fields/retry@v3 | ||
| 147 | + with: | ||
| 148 | + max_attempts: 20 | ||
| 149 | + timeout_seconds: 200 | ||
| 150 | + shell: bash | ||
| 151 | + command: | | ||
| 152 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 153 | + git config --global user.name "Fangjun Kuang" | ||
| 154 | + | ||
| 155 | + d=sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19/ | ||
| 156 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 157 | + export GIT_CLONE_PROTECTION_ACTIVE=false | ||
| 158 | + git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface | ||
| 159 | + mv -v $d/* ./huggingface | ||
| 160 | + cd huggingface | ||
| 161 | + git lfs track "*.onnx" | ||
| 162 | + git lfs track "*.wav" | ||
| 163 | + git status | ||
| 164 | + git add . | ||
| 165 | + git status | ||
| 166 | + git commit -m "add models" | ||
| 167 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main |
scripts/nemo/GigaAM/export-onnx-ctc-v2.py
0 → 100644
| 1 | +import gigaam | ||
| 2 | +import onnx | ||
| 3 | +import torch | ||
| 4 | +from onnxruntime.quantization import QuantType, quantize_dynamic | ||
| 5 | + | ||
| 6 | + | ||
| 7 | +def add_meta_data(filename: str, meta_data: dict[str, str]): | ||
| 8 | + """Add meta data to an ONNX model. It is changed in-place. | ||
| 9 | + | ||
| 10 | + Args: | ||
| 11 | + filename: | ||
| 12 | + Filename of the ONNX model to be changed. | ||
| 13 | + meta_data: | ||
| 14 | + Key-value pairs. | ||
| 15 | + """ | ||
| 16 | + model = onnx.load(filename) | ||
| 17 | + while len(model.metadata_props): | ||
| 18 | + model.metadata_props.pop() | ||
| 19 | + | ||
| 20 | + for key, value in meta_data.items(): | ||
| 21 | + meta = model.metadata_props.add() | ||
| 22 | + meta.key = key | ||
| 23 | + meta.value = str(value) | ||
| 24 | + | ||
| 25 | + onnx.save(model, filename) | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +def main() -> None: | ||
| 29 | + model_name = "v2_ctc" | ||
| 30 | + model = gigaam.load_model(model_name, fp16_encoder=False, use_flash=False, download_root=".") | ||
| 31 | + with open("./tokens.txt", "w", encoding="utf-8") as f: | ||
| 32 | + for i, s in enumerate(model.cfg["labels"]): | ||
| 33 | + f.write(f"{s} {i}\n") | ||
| 34 | + f.write(f"<blk> {i+1}\n") | ||
| 35 | + print("Saved to tokens.txt") | ||
| 36 | + model.to_onnx(".") | ||
| 37 | + meta_data = { | ||
| 38 | + "vocab_size": len(model.cfg["labels"]) + 1, | ||
| 39 | + "normalize_type": "", | ||
| 40 | + "subsampling_factor": 4, | ||
| 41 | + "model_type": "EncDecCTCModel", | ||
| 42 | + "version": "1", | ||
| 43 | + "model_author": "https://github.com/salute-developers/GigaAM", | ||
| 44 | + "license": "https://github.com/salute-developers/GigaAM/blob/main/LICENSE", | ||
| 45 | + "language": "Russian", | ||
| 46 | + "is_giga_am": 1, | ||
| 47 | + } | ||
| 48 | + add_meta_data(f"./{model_name}.onnx", meta_data) | ||
| 49 | + quantize_dynamic( | ||
| 50 | + model_input=f"./{model_name}.onnx", | ||
| 51 | + model_output="./model.int8.onnx", | ||
| 52 | + weight_type=QuantType.QUInt8, | ||
| 53 | + ) | ||
| 54 | + | ||
| 55 | + | ||
| 56 | +if __name__ == '__main__': | ||
| 57 | + main() |
scripts/nemo/GigaAM/run-ctc-v2.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +function install_gigaam() { | ||
| 6 | + curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py | ||
| 7 | + python3 get-pip.py | ||
| 8 | + | ||
| 9 | + BRANCH='main' | ||
| 10 | + python3 -m pip install git+https://github.com/salute-developers/GigaAM.git@$BRANCH#egg=gigaam | ||
| 11 | + | ||
| 12 | + python3 -m pip install -qq kaldi-native-fbank | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +function download_files() { | ||
| 16 | + curl -SL -O https://huggingface.co/csukuangfj/tmp-files/resolve/main/GigaAM/example.wav | ||
| 17 | + curl -SL -O https://github.com/salute-developers/GigaAM/blob/main/LICENSE | ||
| 18 | +} | ||
| 19 | + | ||
| 20 | +install_gigaam | ||
| 21 | +download_files | ||
| 22 | + | ||
| 23 | +python3 ./export-onnx-ctc-v2.py | ||
| 24 | +ls -lh | ||
| 25 | +python3 ./test-onnx-ctc.py |
-
请 注册 或 登录 后发表评论