继续操作前请注册或者登录。
Fangjun Kuang
Committed by GitHub

Set batch size to 1 for more streaming ASR models (#1280)

@@ -7,7 +7,6 @@ on: @@ -7,7 +7,6 @@ on:
7 7
8 workflow_dispatch: 8 workflow_dispatch:
9 9
10 -  
11 concurrency: 10 concurrency:
12 group: mobile-asr-models-${{ github.ref }} 11 group: mobile-asr-models-${{ github.ref }}
13 cancel-in-progress: true 12 cancel-in-progress: true
@@ -16,11 +15,14 @@ jobs: @@ -16,11 +15,14 @@ jobs:
16 mobile-asr-models: 15 mobile-asr-models:
17 if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun' 16 if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun'
18 runs-on: ${{ matrix.os }} 17 runs-on: ${{ matrix.os }}
  18 + name: ${{ matrix.index }}/${{ matrix.total }}
19 strategy: 19 strategy:
20 fail-fast: false 20 fail-fast: false
21 matrix: 21 matrix:
22 os: [ubuntu-latest] 22 os: [ubuntu-latest]
23 python-version: ["3.8"] 23 python-version: ["3.8"]
  24 + total: ["11"]
  25 + index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]
24 26
25 steps: 27 steps:
26 - uses: actions/checkout@v4 28 - uses: actions/checkout@v4
@@ -33,7 +35,20 @@ jobs: @@ -33,7 +35,20 @@ jobs:
33 - name: Install dependencies 35 - name: Install dependencies
34 shell: bash 36 shell: bash
35 run: | 37 run: |
36 - python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0 38 + python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0 jinja2
  39 +
  40 + - name: Generate build script
  41 + shell: bash
  42 + run: |
  43 + cd scripts/mobile-asr-models
  44 +
  45 + total=${{ matrix.total }}
  46 + index=${{ matrix.index }}
  47 +
  48 + ./generate-asr.py --total $total --index $index
  49 + chmod +x run2.sh
  50 + mv run2.sh run.sh
  51 + ls -lh
37 52
38 - name: Run 53 - name: Run
39 shell: bash 54 shell: bash
  1 +name: mobile-kws-models
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - asr-mobile
  7 +
  8 + workflow_dispatch:
  9 +
  10 +concurrency:
  11 + group: mobile-kws-models-${{ github.ref }}
  12 + cancel-in-progress: true
  13 +
  14 +jobs:
  15 + mobile-kws-models:
  16 + if: github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj' || github.repository_owner == 'csu-fangjun'
  17 + runs-on: ${{ matrix.os }}
  18 + name: ${{ matrix.index }}/${{ matrix.total }}
  19 + strategy:
  20 + fail-fast: false
  21 + matrix:
  22 + os: [ubuntu-latest]
  23 + python-version: ["3.8"]
  24 + total: ["2"]
  25 + index: ["0", "1"]
  26 +
  27 + steps:
  28 + - uses: actions/checkout@v4
  29 +
  30 + - name: Setup Python ${{ matrix.python-version }}
  31 + uses: actions/setup-python@v5
  32 + with:
  33 + python-version: ${{ matrix.python-version }}
  34 +
  35 + - name: Install dependencies
  36 + shell: bash
  37 + run: |
  38 + python3 -m pip install onnxruntime==1.16.3 onnx==1.15.0 jinja2
  39 +
  40 + - name: Generate build script
  41 + shell: bash
  42 + run: |
  43 + cd scripts/mobile-asr-models
  44 +
  45 + total=${{ matrix.total }}
  46 + index=${{ matrix.index }}
  47 +
  48 + ./generate-kws.py --total $total --index $index
  49 + chmod +x run2.sh
  50 + mv run2.sh run.sh
  51 + ls -lh
  52 +
  53 + - name: Run
  54 + shell: bash
  55 + run: |
  56 + cd scripts/mobile-asr-models
  57 + ./run.sh
  58 +
  59 + - name: Release
  60 + uses: svenstaro/upload-release-action@v2
  61 + with:
  62 + file_glob: true
  63 + file: ./kws/*.tar.bz2
  64 + overwrite: true
  65 + repo_name: k2-fsa/sherpa-onnx
  66 + repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
  67 + tag: kws-models
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 2
3 import argparse 3 import argparse
4 from dataclasses import dataclass 4 from dataclasses import dataclass
5 -from typing import List, Optional  
6 5
7 import jinja2 6 import jinja2
8 7
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 2
3 import argparse 3 import argparse
4 from dataclasses import dataclass 4 from dataclasses import dataclass
5 -from typing import List, Optional  
6 5
7 import jinja2 6 import jinja2
8 7
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 2
3 import argparse 3 import argparse
4 from dataclasses import dataclass 4 from dataclasses import dataclass
5 -from typing import List, Optional  
6 5
7 import jinja2 6 import jinja2
8 7
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 2
3 import argparse 3 import argparse
4 from dataclasses import dataclass 4 from dataclasses import dataclass
5 -from typing import List, Optional  
6 5
7 import jinja2 6 import jinja2
8 7
@@ -2,7 +2,7 @@ @@ -2,7 +2,7 @@
2 2
3 import argparse 3 import argparse
4 from dataclasses import dataclass 4 from dataclasses import dataclass
5 -from typing import List, Optional 5 +from typing import List
6 6
7 import jinja2 7 import jinja2
8 8
@@ -34,76 +34,99 @@ class SpeakerIdentificationModel: @@ -34,76 +34,99 @@ class SpeakerIdentificationModel:
34 34
35 def get_3dspeaker_models() -> List[SpeakerIdentificationModel]: 35 def get_3dspeaker_models() -> List[SpeakerIdentificationModel]:
36 models = [ 36 models = [
37 - SpeakerIdentificationModel(model_name="3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx"),  
38 - SpeakerIdentificationModel(model_name="3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"),  
39 - SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx"),  
40 - SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"),  
41 - SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx"),  
42 - SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx"),  
43 - SpeakerIdentificationModel(model_name="3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx"), 37 + SpeakerIdentificationModel(
  38 + model_name="3dspeaker_speech_campplus_sv_en_voxceleb_16k.onnx"
  39 + ),
  40 + SpeakerIdentificationModel(
  41 + model_name="3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx"
  42 + ),
  43 + SpeakerIdentificationModel(
  44 + model_name="3dspeaker_speech_eres2net_base_200k_sv_zh-cn_16k-common.onnx"
  45 + ),
  46 + SpeakerIdentificationModel(
  47 + model_name="3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx"
  48 + ),
  49 + SpeakerIdentificationModel(
  50 + model_name="3dspeaker_speech_eres2net_large_sv_zh-cn_3dspeaker_16k.onnx"
  51 + ),
  52 + SpeakerIdentificationModel(
  53 + model_name="3dspeaker_speech_eres2net_sv_en_voxceleb_16k.onnx"
  54 + ),
  55 + SpeakerIdentificationModel(
  56 + model_name="3dspeaker_speech_eres2net_sv_zh-cn_16k-common.onnx"
  57 + ),
44 ] 58 ]
45 59
46 - prefix = '3dspeaker_speech_' 60 + prefix = "3dspeaker_speech_"
47 num = len(prefix) 61 num = len(prefix)
48 for m in models: 62 for m in models:
49 - m.framework = '3dspeaker' 63 + m.framework = "3dspeaker"
50 m.short_name = m.model_name[num:-5] 64 m.short_name = m.model_name[num:-5]
51 - if '_zh-cn_' in m.model_name:  
52 - m.lang = 'zh'  
53 - elif '_en_' in m.model_name:  
54 - m.lang = 'en' 65 + if "_zh-cn_" in m.model_name:
  66 + m.lang = "zh"
  67 + elif "_en_" in m.model_name:
  68 + m.lang = "en"
55 else: 69 else:
56 raise ValueError(m) 70 raise ValueError(m)
57 return models 71 return models
58 72
  73 +
59 def get_wespeaker_models() -> List[SpeakerIdentificationModel]: 74 def get_wespeaker_models() -> List[SpeakerIdentificationModel]:
60 models = [ 75 models = [
61 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++.onnx"), 76 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++.onnx"),
62 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++_LM.onnx"), 77 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_CAM++_LM.onnx"),
63 - SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet152_LM.onnx"),  
64 - SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet221_LM.onnx"),  
65 - SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet293_LM.onnx"), 78 + SpeakerIdentificationModel(
  79 + model_name="wespeaker_en_voxceleb_resnet152_LM.onnx"
  80 + ),
  81 + SpeakerIdentificationModel(
  82 + model_name="wespeaker_en_voxceleb_resnet221_LM.onnx"
  83 + ),
  84 + SpeakerIdentificationModel(
  85 + model_name="wespeaker_en_voxceleb_resnet293_LM.onnx"
  86 + ),
66 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34.onnx"), 87 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34.onnx"),
67 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34_LM.onnx"), 88 SpeakerIdentificationModel(model_name="wespeaker_en_voxceleb_resnet34_LM.onnx"),
68 SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34.onnx"), 89 SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34.onnx"),
69 SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34_LM.onnx"), 90 SpeakerIdentificationModel(model_name="wespeaker_zh_cnceleb_resnet34_LM.onnx"),
70 ] 91 ]
71 92
72 - prefix = 'wespeaker_xx_' 93 + prefix = "wespeaker_xx_"
73 num = len(prefix) 94 num = len(prefix)
74 for m in models: 95 for m in models:
75 - m.framework = 'wespeaker' 96 + m.framework = "wespeaker"
76 m.short_name = m.model_name[num:-5] 97 m.short_name = m.model_name[num:-5]
77 - if '_zh_' in m.model_name:  
78 - m.lang = 'zh'  
79 - elif '_en_' in m.model_name:  
80 - m.lang = 'en' 98 + if "_zh_" in m.model_name:
  99 + m.lang = "zh"
  100 + elif "_en_" in m.model_name:
  101 + m.lang = "en"
81 else: 102 else:
82 raise ValueError(m) 103 raise ValueError(m)
83 return models 104 return models
84 105
  106 +
85 def get_nemo_models() -> List[SpeakerIdentificationModel]: 107 def get_nemo_models() -> List[SpeakerIdentificationModel]:
86 models = [ 108 models = [
87 - SpeakerIdentificationModel(model_name="nemo_en_speakerverification_speakernet.onnx"), 109 + SpeakerIdentificationModel(
  110 + model_name="nemo_en_speakerverification_speakernet.onnx"
  111 + ),
88 SpeakerIdentificationModel(model_name="nemo_en_titanet_large.onnx"), 112 SpeakerIdentificationModel(model_name="nemo_en_titanet_large.onnx"),
89 SpeakerIdentificationModel(model_name="nemo_en_titanet_small.onnx"), 113 SpeakerIdentificationModel(model_name="nemo_en_titanet_small.onnx"),
90 ] 114 ]
91 115
92 - prefix = 'nemo_en_' 116 + prefix = "nemo_en_"
93 num = len(prefix) 117 num = len(prefix)
94 for m in models: 118 for m in models:
95 - m.framework = 'nemo' 119 + m.framework = "nemo"
96 m.short_name = m.model_name[num:-5] 120 m.short_name = m.model_name[num:-5]
97 - if '_zh_' in m.model_name:  
98 - m.lang = 'zh'  
99 - elif '_en_' in m.model_name:  
100 - m.lang = 'en' 121 + if "_zh_" in m.model_name:
  122 + m.lang = "zh"
  123 + elif "_en_" in m.model_name:
  124 + m.lang = "en"
101 else: 125 else:
102 raise ValueError(m) 126 raise ValueError(m)
103 return models 127 return models
104 128
105 129
106 -  
107 def main(): 130 def main():
108 args = get_args() 131 args = get_args()
109 index = args.index 132 index = args.index
@@ -2,7 +2,6 @@ @@ -2,7 +2,6 @@
2 2
3 import argparse 3 import argparse
4 from dataclasses import dataclass 4 from dataclasses import dataclass
5 -from typing import List, Optional  
6 5
7 import jinja2 6 import jinja2
8 7
@@ -16,3 +16,97 @@ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipform @@ -16,3 +16,97 @@ https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipform
16 16
17 The following [colab notebook](https://colab.research.google.com/drive/1RsVZbsxbPjazeGrNNbZNjXCYbEG2F2DU?usp=sharing) 17 The following [colab notebook](https://colab.research.google.com/drive/1RsVZbsxbPjazeGrNNbZNjXCYbEG2F2DU?usp=sharing)
18 provides examples to use the above two models. 18 provides examples to use the above two models.
  19 +
  20 +**WARNING**: Tested with `onnxruntime==1.16.3 onnx==1.15.0`.
  21 +
  22 +```bash
  23 +pip install onnxruntime==1.16.3 onnx==1.15.0
  24 +```
  25 +
  26 +## More examples
  27 +
  28 +### [sherpa-onnx-streaming-zipformer-korean-2024-06-16](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-korean-2024-06-16-korean)
  29 +
  30 +
  31 +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
  32 +|---|---|---|
  33 +|Dynamic batch size| 279 MB| 122 MB|
  34 +|Batch size fixed to 1| 264 MB | 107 MB |
  35 +
  36 +### [sherpa-onnx-streaming-zipformer-en-20M-2023-02-17](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-20m-2023-02-17-english)
  37 +
  38 +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
  39 +|---|---|---|
  40 +|Dynamic batch size| 85 MB| 41 MB|
  41 +|Batch size fixed to 1| 75 MB | 32 MB |
  42 +
  43 +### [sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese)
  44 +
  45 +| | encoder-epoch-20-avg-1-chunk-16-left-128.onnx | encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx|
  46 +|---|---|---|
  47 +|Dynamic batch size| 249 MB| 67 MB|
  48 +|Batch size fixed to 1| 247 MB | 65 MB |
  49 +
  50 +### [icefall-asr-zipformer-streaming-wenetspeech-20230615](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#pkufool-icefall-asr-zipformer-streaming-wenetspeech-20230615-chinese)
  51 +
  52 +| | encoder-epoch-12-avg-4-chunk-16-left-128.onnx | encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx|
  53 +|---|---|---|
  54 +|Dynamic batch size| 250 MB| 68 MB|
  55 +|Batch size fixed to 1| 247 MB | 65 MB |
  56 +
  57 +### [sherpa-onnx-streaming-zipformer-en-2023-06-26](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english)
  58 +
  59 +
  60 +| | encoder-epoch-99-avg-1-chunk-16-left-128.onnx | encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx|
  61 +|---|---|---|
  62 +|Dynamic batch size| 250 MB| 68 MB|
  63 +|Batch size fixed to 1| 247 MB | 65 MB |
  64 +
  65 +### [sherpa-onnx-streaming-zipformer-en-2023-06-21](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-21-english)
  66 +
  67 +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
  68 +|---|---|---|
  69 +|Dynamic batch size| 338 MB| 180 MB|
  70 +|Batch size fixed to 1| 264 MB | 107 MB |
  71 +
  72 +### [sherpa-onnx-streaming-zipformer-en-2023-02-21](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-02-21-english)
  73 +
  74 +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
  75 +|---|---|---|
  76 +|Dynamic batch size| 279 MB| 122 MB|
  77 +|Batch size fixed to 1| 264 MB | 107 MB |
  78 +
  79 +### [sherpa-onnx-streaming-zipformer-fr-2023-04-14](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#shaojieli-sherpa-onnx-streaming-zipformer-fr-2023-04-14-french)
  80 +
  81 +| | encoder-epoch-29-avg-9-with-averaged-model.onnx | encoder-epoch-29-avg-9-with-averaged-model.int8.onnx|
  82 +|---|---|---|
  83 +|Dynamic batch size| 279 MB| 121 MB|
  84 +|Batch size fixed to 1| 264 MB | 107 MB |
  85 +
  86 +### [sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16-bilingual-chinese-english)
  87 +
  88 +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
  89 +|---|---|---|
  90 +|Dynamic batch size| 85 MB| 41 MB|
  91 +|Batch size fixed to 1| 75 MB | 32 MB |
  92 +
  93 +### [sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-zh-14m-2023-02-23-chinese)
  94 +
  95 +| | encoder-epoch-99-avg-1.onnx | encoder-epoch-99-avg-1.int8.onnx|
  96 +|---|---|---|
  97 +|Dynamic batch size| 40 MB| 21 MB|
  98 +|Batch size fixed to 1| 33 MB | 15 MB |
  99 +
  100 +### [sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01](https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html#sherpa-onnx-kws-zipformer-wenetspeech-3-3m-2024-01-01-chinese)
  101 +
  102 +| | encoder-epoch-12-avg-2-chunk-16-left-64.onnx | encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx|
  103 +|---|---|---|
  104 +|Dynamic batch size| 12 MB| 4.6 MB|
  105 +|Batch size fixed to 1| 11 MB | 3.9 MB |
  106 +
  107 +### [sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01](https://k2-fsa.github.io/sherpa/onnx/kws/pretrained_models/index.html#sherpa-onnx-kws-zipformer-gigaspeech-3-3m-2024-01-01-english)
  108 +
  109 +| | encoder-epoch-12-avg-2-chunk-16-left-64.onnx | encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx|
  110 +|---|---|---|
  111 +|Dynamic batch size| 12 MB| 4.6 MB|
  112 +|Batch size fixed to 1| 11 MB | 3.9 MB |
1 #!/usr/bin/env python3 1 #!/usr/bin/env python3
2 import argparse 2 import argparse
3 3
  4 +import onnxruntime
4 from onnxruntime.quantization import QuantType, quantize_dynamic 5 from onnxruntime.quantization import QuantType, quantize_dynamic
5 6
6 7
  8 +def show(filename):
  9 + session_opts = onnxruntime.SessionOptions()
  10 + session_opts.log_severity_level = 3
  11 + sess = onnxruntime.InferenceSession(filename, session_opts)
  12 + for i in sess.get_inputs():
  13 + print(i)
  14 +
  15 + print("-----")
  16 +
  17 + for i in sess.get_outputs():
  18 + print(i)
  19 +
  20 +
7 def get_args(): 21 def get_args():
8 parser = argparse.ArgumentParser() 22 parser = argparse.ArgumentParser()
9 parser.add_argument( 23 parser.add_argument(
@@ -25,6 +39,9 @@ def get_args(): @@ -25,6 +39,9 @@ def get_args():
25 def main(): 39 def main():
26 args = get_args() 40 args = get_args()
27 print(vars(args)) 41 print(vars(args))
  42 + print(f"----------{args.input}----------")
  43 + show(args.input)
  44 + print("------------------------------")
28 45
29 quantize_dynamic( 46 quantize_dynamic(
30 model_input=args.input, 47 model_input=args.input,
  1 +#!/usr/bin/env python3
  2 +
  3 +import argparse
  4 +from dataclasses import dataclass
  5 +import jinja2
  6 +
  7 +
  8 +def get_args():
  9 + parser = argparse.ArgumentParser()
  10 + parser.add_argument(
  11 + "--total",
  12 + type=int,
  13 + default=1,
  14 + help="Number of runners",
  15 + )
  16 + parser.add_argument(
  17 + "--index",
  18 + type=int,
  19 + default=0,
  20 + help="Index of the current runner",
  21 + )
  22 + return parser.parse_args()
  23 +
  24 +
  25 +@dataclass
  26 +class Model:
  27 + # We will download
  28 + # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
  29 + model_name: str
  30 +
  31 + cmd: str
  32 +
  33 +
  34 +def get_streaming_zipformer_transducer_models():
  35 + models = [
  36 + Model(
  37 + model_name="sherpa-onnx-streaming-zipformer-korean-2024-06-16",
  38 + cmd="""
  39 + ./run-impl.sh \
  40 + --input $src/encoder-epoch-99-avg-1.onnx \
  41 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  42 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  43 +
  44 + cp -v $src/bpe.model $dst/ || true
  45 + cp -v $src/tokens.txt $dst/
  46 + cp -av $src/test_wavs $dst/
  47 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  48 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  49 +
  50 + cat > $dst/notes.md <<EOF
  51 +# Introduction
  52 +This model is converted from
  53 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  54 +and it supports only batch size equal to 1.
  55 +EOF
  56 + """,
  57 + ),
  58 + Model(
  59 + model_name="sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12",
  60 + cmd="""
  61 + ./run-impl.sh \
  62 + --input $src/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
  63 + --output1 $dst/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
  64 + --output2 $dst/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx
  65 +
  66 + cp -v $src/bpe.model $dst/ || true
  67 + cp -v $src/README.md $dst/
  68 + cp -v $src/tokens.txt $dst/
  69 + cp -av $src/test_wavs $dst/
  70 + cp -v $src/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst/
  71 + cp -v $src/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst/
  72 +
  73 + cat > $dst/notes.md <<EOF
  74 +# Introduction
  75 +This model is converted from
  76 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  77 +and it supports only batch size equal to 1.
  78 +EOF
  79 + """,
  80 + ),
  81 + Model(
  82 + model_name="icefall-asr-zipformer-streaming-wenetspeech-20230615",
  83 + cmd="""
  84 + ./run-impl.sh \
  85 + --input $src/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
  86 + --output1 $dst/encoder-epoch-12-avg-4-chunk-16-left-128.onnx \
  87 + --output2 $dst/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
  88 +
  89 + cp -fv $src/README.md $dst/
  90 + cp -v $src/data/lang_char/tokens.txt $dst/
  91 + cp -av $src/test_wavs $dst/
  92 + cp -v $src/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx $dst/
  93 + cp -v $src/exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx $dst/
  94 +
  95 + cat > $dst/notes.md <<EOF
  96 +# Introduction
  97 +This model is converted from
  98 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  99 +and it supports only batch size equal to 1.
  100 +EOF
  101 + """,
  102 + ),
  103 + Model(
  104 + model_name="sherpa-onnx-streaming-zipformer-en-2023-06-26",
  105 + cmd="""
  106 + ./run-impl.sh \
  107 + --input $src/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
  108 + --output1 $dst/encoder-epoch-99-avg-1-chunk-16-left-128.onnx \
  109 + --output2 $dst/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
  110 +
  111 + cp -v $src/bpe.model $dst/ || true
  112 + cp -v $src/README.md $dst/
  113 + cp -v $src/tokens.txt $dst/
  114 + cp -av $src/test_wavs $dst/
  115 + cp -v $src/decoder-epoch-99-avg-1-chunk-16-left-128.onnx $dst/
  116 + cp -v $src/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx $dst/
  117 +
  118 + cat > $dst/notes.md <<EOF
  119 +# Introduction
  120 +This model is converted from
  121 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  122 +and it supports only batch size equal to 1.
  123 +EOF
  124 + """,
  125 + ),
  126 + Model(
  127 + model_name="sherpa-onnx-streaming-zipformer-en-2023-06-21",
  128 + cmd="""
  129 + ./run-impl.sh \
  130 + --input $src/encoder-epoch-99-avg-1.onnx \
  131 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  132 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  133 +
  134 + cp -fv $src/README.md $dst/
  135 + cp -v $src/tokens.txt $dst/
  136 + cp -av $src/test_wavs $dst/
  137 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  138 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  139 +
  140 + cat > $dst/notes.md <<EOF
  141 +# Introduction
  142 +This model is converted from
  143 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  144 +and it supports only batch size equal to 1.
  145 +EOF
  146 + """,
  147 + ),
  148 + Model(
  149 + model_name="sherpa-onnx-streaming-zipformer-en-2023-02-21",
  150 + cmd="""
  151 + ./run-impl.sh \
  152 + --input $src/encoder-epoch-99-avg-1.onnx \
  153 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  154 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  155 +
  156 + cp -v $src/bpe.model $dst/ || true
  157 + cp -v $src/README.md $dst/ || true
  158 + cp -v $src/tokens.txt $dst/
  159 + cp -av $src/test_wavs $dst/
  160 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  161 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  162 +
  163 + cat > $dst/notes.md <<EOF
  164 +# Introduction
  165 +This model is converted from
  166 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  167 +and it supports only batch size equal to 1.
  168 +EOF
  169 + """,
  170 + ),
  171 + Model(
  172 + model_name="sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20",
  173 + cmd="""
  174 + ./run-impl.sh \
  175 + --input $src/encoder-epoch-99-avg-1.onnx \
  176 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  177 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  178 +
  179 + cp -v $src/README.md $dst/
  180 + cp -v $src/tokens.txt $dst/
  181 + cp -av $src/test_wavs $dst/
  182 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  183 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  184 +
  185 + cat > $dst/notes.md <<EOF
  186 +# Introduction
  187 +This model is converted from
  188 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  189 +and it supports only batch size equal to 1.
  190 +EOF
  191 + """,
  192 + ),
  193 + Model(
  194 + model_name="sherpa-onnx-streaming-zipformer-fr-2023-04-14",
  195 + cmd="""
  196 + ./run-impl.sh \
  197 + --input $src/encoder-epoch-29-avg-9-with-averaged-model.onnx \
  198 + --output1 $dst/encoder-epoch-29-avg-9-with-averaged-model.onnx \
  199 + --output2 $dst/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx
  200 +
  201 + cp -v $src/bpe.model $dst/ || true
  202 + cp -v $src/README.md $dst/ || true
  203 + cp -v $src/tokens.txt $dst/
  204 + cp -av $src/test_wavs $dst/
  205 + cp -v $src/decoder-epoch-29-avg-9-with-averaged-model.onnx $dst/
  206 + cp -v $src/joiner-epoch-29-avg-9-with-averaged-model.int8.onnx $dst/
  207 +
  208 + cat > $dst/notes.md <<EOF
  209 +# Introduction
  210 +This model is converted from
  211 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  212 +and it supports only batch size equal to 1.
  213 +EOF
  214 + """,
  215 + ),
  216 + Model(
  217 + model_name="sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16",
  218 + cmd="""
  219 + ./run-impl.sh \
  220 + --input $src/encoder-epoch-99-avg-1.onnx \
  221 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  222 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  223 +
  224 + mkdir $dst/{64,96}
  225 +
  226 + ./run-impl.sh \
  227 + --input $src/64/encoder-epoch-99-avg-1.onnx \
  228 + --output1 $dst/64/encoder-epoch-99-avg-1.onnx \
  229 + --output2 $dst/64/encoder-epoch-99-avg-1.int8.onnx
  230 +
  231 + ./run-impl.sh \
  232 + --input $src/96/encoder-epoch-99-avg-1.onnx \
  233 + --output1 $dst/96/encoder-epoch-99-avg-1.onnx \
  234 + --output2 $dst/96/encoder-epoch-99-avg-1.int8.onnx
  235 +
  236 + cp -v $src/bpe.model $dst/ || true
  237 + cp -v $src/README.md $dst/ || true
  238 + cp -av $src/test_wavs $dst/
  239 +
  240 + cp -v $src/tokens.txt $dst/
  241 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  242 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  243 +
  244 + cp -v $src/tokens.txt $dst/64/
  245 + cp -v $src/64/decoder-epoch-99-avg-1.onnx $dst/64/
  246 + cp -v $src/64/joiner-epoch-99-avg-1.int8.onnx $dst/64/
  247 +
  248 + cp -v $src/tokens.txt $dst/96/
  249 + cp -v $src/96/decoder-epoch-99-avg-1.onnx $dst/96/
  250 + cp -v $src/96/joiner-epoch-99-avg-1.int8.onnx $dst/96/
  251 +
  252 + cat > $dst/notes.md <<EOF
  253 +# Introduction
  254 +This model is converted from
  255 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  256 +and it supports only batch size equal to 1.
  257 +EOF
  258 + """,
  259 + ),
  260 + Model(
  261 + model_name="sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23",
  262 + cmd="""
  263 + ./run-impl.sh \
  264 + --input $src/encoder-epoch-99-avg-1.onnx \
  265 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  266 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  267 +
  268 + cp -v $src/bpe.model $dst/ || true
  269 + cp -v $src/README.md $dst/ || true
  270 + cp -v $src/tokens.txt $dst/
  271 + cp -av $src/test_wavs $dst/
  272 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  273 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  274 +
  275 + cat > $dst/notes.md <<EOF
  276 +# Introduction
  277 +This model is converted from
  278 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  279 +and it supports only batch size equal to 1.
  280 +EOF
  281 + """,
  282 + ),
  283 + Model(
  284 + model_name="sherpa-onnx-streaming-zipformer-en-20M-2023-02-17",
  285 + cmd="""
  286 + ./run-impl.sh \
  287 + --input $src/encoder-epoch-99-avg-1.onnx \
  288 + --output1 $dst/encoder-epoch-99-avg-1.onnx \
  289 + --output2 $dst/encoder-epoch-99-avg-1.int8.onnx
  290 +
  291 + cp -v $src/bpe.model $dst/ || true
  292 + cp -v $src/README.md $dst/ || true
  293 + cp -v $src/tokens.txt $dst/
  294 + cp -av $src/test_wavs $dst/
  295 + cp -v $src/decoder-epoch-99-avg-1.onnx $dst/
  296 + cp -v $src/joiner-epoch-99-avg-1.int8.onnx $dst/
  297 +
  298 + cat > $dst/notes.md <<EOF
  299 +# Introduction
  300 +This model is converted from
  301 +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  302 +and it supports only batch size equal to 1.
  303 +EOF
  304 + """,
  305 + ),
  306 + ]
  307 +
  308 + return models
  309 +
  310 +
  311 +def get_models():
  312 + return get_streaming_zipformer_transducer_models()
  313 +
  314 +
  315 +def main():
  316 + args = get_args()
  317 + index = args.index
  318 + total = args.total
  319 + assert 0 <= index < total, (index, total)
  320 +
  321 + all_model_list = get_models()
  322 +
  323 + num_models = len(all_model_list)
  324 +
  325 + num_per_runner = num_models // total
  326 + if num_per_runner <= 0:
  327 + raise ValueError(f"num_models: {num_models}, num_runners: {total}")
  328 +
  329 + start = index * num_per_runner
  330 + end = start + num_per_runner
  331 +
  332 + remaining = num_models - args.total * num_per_runner
  333 +
  334 + print(f"{index}/{total}: {start}-{end}/{num_models}")
  335 +
  336 + d = dict()
  337 + d["model_list"] = all_model_list[start:end]
  338 + if index < remaining:
  339 + s = args.total * num_per_runner + index
  340 + d["model_list"].append(all_model_list[s])
  341 + print(f"{s}/{num_models}")
  342 +
  343 + filename_list = [
  344 + "./run2.sh",
  345 + ]
  346 + for filename in filename_list:
  347 + environment = jinja2.Environment()
  348 + with open(f"{filename}.in") as f:
  349 + s = f.read()
  350 + template = environment.from_string(s)
  351 +
  352 + s = template.render(**d)
  353 + with open(filename, "w") as f:
  354 + print(s, file=f)
  355 +
  356 +
  357 +if __name__ == "__main__":
  358 + main()
  1 +#!/usr/bin/env python3
  2 +
  3 +import argparse
  4 +from dataclasses import dataclass
  5 +import jinja2
  6 +
  7 +
  8 +def get_args():
  9 + parser = argparse.ArgumentParser()
  10 + parser.add_argument(
  11 + "--total",
  12 + type=int,
  13 + default=1,
  14 + help="Number of runners",
  15 + )
  16 + parser.add_argument(
  17 + "--index",
  18 + type=int,
  19 + default=0,
  20 + help="Index of the current runner",
  21 + )
  22 + return parser.parse_args()
  23 +
  24 +
  25 +@dataclass
  26 +class Model:
  27 + # We will download
  28 + # https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/{model_name}.tar.bz2
  29 + model_name: str
  30 +
  31 + cmd: str
  32 +
  33 +
  34 +def get_kws_models():
  35 + models = [
  36 + Model(
  37 + model_name="sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01",
  38 + cmd="""
  39 + ./run-impl.sh \
  40 + --input $src/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  41 + --output1 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  42 + --output2 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx
  43 +
  44 + cp -v $src/README.md $dst/
  45 + cp -v $src/*.txt $dst/
  46 + cp -av $src/test_wavs $dst/
  47 + cp -v $src/decoder-epoch-12-avg-2-chunk-16-left-64.onnx $dst/
  48 + cp -v $src/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx $dst/
  49 +
  50 + cat > $dst/notes.md <<EOF
  51 +# Introduction
  52 +This model is converted from
  53 +https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$src.tar.bz2
  54 +and it supports only batch size equal to 1.
  55 +EOF
  56 + """,
  57 + ),
  58 + Model(
  59 + model_name="sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01",
  60 + cmd="""
  61 + ./run-impl.sh \
  62 + --input $src/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  63 + --output1 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.onnx \
  64 + --output2 $dst/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx
  65 +
  66 + cp -v $src/bpe.model $dst/
  67 + cp -v $src/README.md $dst/
  68 + cp -v $src/*.txt $dst/
  69 + cp -av $src/test_wavs $dst/
  70 + cp -v $src/decoder-epoch-12-avg-2-chunk-16-left-64.onnx $dst/
  71 + cp -v $src/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx $dst/
  72 +
  73 + cat > $dst/notes.md <<EOF
  74 +# Introduction
  75 +This model is converted from
  76 +https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$src.tar.bz2
  77 +and it supports only batch size equal to 1.
  78 +EOF
  79 + """,
  80 + ),
  81 + ]
  82 + return models
  83 +
  84 +
  85 +def get_models():
  86 + return get_kws_models()
  87 +
  88 +
  89 +def main():
  90 + args = get_args()
  91 + index = args.index
  92 + total = args.total
  93 + assert 0 <= index < total, (index, total)
  94 +
  95 + all_model_list = get_models()
  96 +
  97 + num_models = len(all_model_list)
  98 +
  99 + num_per_runner = num_models // total
  100 + if num_per_runner <= 0:
  101 + raise ValueError(f"num_models: {num_models}, num_runners: {total}")
  102 +
  103 + start = index * num_per_runner
  104 + end = start + num_per_runner
  105 +
  106 + remaining = num_models - args.total * num_per_runner
  107 +
  108 + print(f"{index}/{total}: {start}-{end}/{num_models}")
  109 +
  110 + d = dict()
  111 + d["model_list"] = all_model_list[start:end]
  112 + if index < remaining:
  113 + s = args.total * num_per_runner + index
  114 + d["model_list"].append(all_model_list[s])
  115 + print(f"{s}/{num_models}")
  116 +
  117 + filename_list = [
  118 + "./run2.sh",
  119 + ]
  120 + for filename in filename_list:
  121 + environment = jinja2.Environment()
  122 + with open(f"{filename}.in") as f:
  123 + s = f.read()
  124 + template = environment.from_string(s)
  125 +
  126 + s = template.render(**d)
  127 + with open(filename, "w") as f:
  128 + print(s, file=f)
  129 +
  130 +
  131 +if __name__ == "__main__":
  132 + main()
@@ -11,6 +11,7 @@ input= @@ -11,6 +11,7 @@ input=
11 output1= 11 output1=
12 output2= 12 output2=
13 batch_dim=N 13 batch_dim=N
  14 +
14 source ./parse_options.sh 15 source ./parse_options.sh
15 16
16 if [ -z $input ]; then 17 if [ -z $input ]; then
@@ -35,6 +36,7 @@ echo "output2: $output2" @@ -35,6 +36,7 @@ echo "output2: $output2"
35 36
36 python3 -m onnxruntime.tools.make_dynamic_shape_fixed --dim_param $batch_dim --dim_value 1 $input tmp.fixed.onnx 37 python3 -m onnxruntime.tools.make_dynamic_shape_fixed --dim_param $batch_dim --dim_value 1 $input tmp.fixed.onnx
37 python3 -m onnxruntime.quantization.preprocess --input tmp.fixed.onnx --output $output1 38 python3 -m onnxruntime.quantization.preprocess --input tmp.fixed.onnx --output $output1
  39 +
38 python3 ./dynamic_quantization.py --input $output1 --output $output2 40 python3 ./dynamic_quantization.py --input $output1 --output $output2
39 41
40 ls -lh $input tmp.fixed.onnx $output1 $output2 42 ls -lh $input tmp.fixed.onnx $output1 $output2
  1 +#!/usr/bin/env bash
  2 +set -e
  3 +
  4 +{% for model in model_list %}
  5 +
  6 +src={{ model.model_name }}
  7 +
  8 +if [[ $src == *kws* ]]; then
  9 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/$src.tar.bz2
  10 +
  11 +else
  12 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/$src.tar.bz2
  13 +fi
  14 +
  15 +tar xvf $src.tar.bz2
  16 +rm $src.tar.bz2
  17 +
  18 +dst=$src-mobile
  19 +
  20 +mkdir -p $dst
  21 +
  22 +{{ model.cmd }}
  23 +
  24 +echo "---$src---"
  25 +ls -lh $src
  26 +echo "---$dst---"
  27 +ls -lh $dst
  28 +rm -rf $src
  29 +
  30 +tar cjfv $dst.tar.bz2 $dst
  31 +
  32 +if [[ $src == *kws* ]]; then
  33 + mkdir -p ../../kws
  34 + mv *.tar.bz2 ../../kws/
  35 +else
  36 + mv *.tar.bz2 ../../
  37 +fi
  38 +rm -rf $dst
  39 +
  40 +{% endfor %}