Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2025-04-21 18:57:41 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-04-21 18:57:41 +0800
Commit
7cbb1bc4330d248916878f5eb899cfd6961e74dd
7cbb1bc4
1 parent
6cabaa11
Upload more onnx ASR models (#2141)
显示空白字符变更
内嵌
并排对比
正在显示
3 个修改的文件
包含
338 行增加
和
0 行删除
.github/workflows/upload-models.yaml
scripts/apk/generate-vad-asr-apk-script.py
sherpa-onnx/kotlin-api/OfflineRecognizer.kt
.github/workflows/upload-models.yaml
0 → 100644
查看文件 @
7cbb1bc
name
:
upload-models
on
:
push
:
branches
:
-
upload-models
workflow_dispatch
:
concurrency
:
group
:
upload-models-${{ github.ref }}
cancel-in-progress
:
true
jobs
:
upload-models
:
if
:
github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name
:
upload models
runs-on
:
${{ matrix.os }}
strategy
:
fail-fast
:
false
matrix
:
os
:
[
ubuntu-latest
]
python-version
:
[
"
3.8"
]
steps
:
-
uses
:
actions/checkout@v4
-
name
:
Vietnamese (zipformer)
shell
:
bash
run
:
|
rm -rf models
mkdir models
cd models
cat >README.md <<EOF
# Introduction
Models in this directory are from
https://huggingface.co/zzasdf/viet_iter3_pseudo_label
which are trained on about 70k hours of data.
EOF
git lfs install
git clone https://huggingface.co/csukuangfj/viet_iter3_pseudo_label hf
ls -lh
d=sherpa-onnx-zipformer-vi-2025-04-20
mkdir -p $d
cp -v hf/exp/encoder-epoch-12-avg-8.onnx $d/
cp -v hf/exp/decoder-epoch-12-avg-8.onnx $d/
cp -v hf/exp/joiner-epoch-12-avg-8.onnx $d/
cp -v hf/data/Vietnam_bpe_2000_new/bpe.model $d/
cp -v hf/data/Vietnam_bpe_2000_new/tokens.txt $d/
cp -av hf/test_wavs $d
cp -v README.md $d
tar cjfv $d.tar.bz2 $d
d=sherpa-onnx-zipformer-vi-int8-2025-04-20
mkdir -p $d
cp -v hf/exp/encoder-epoch-12-avg-8.int8.onnx $d/
cp -v hf/exp/decoder-epoch-12-avg-8.onnx $d/
cp -v hf/exp/joiner-epoch-12-avg-8.int8.onnx $d/
cp -v hf/data/Vietnam_bpe_2000_new/bpe.model $d/
cp -v hf/data/Vietnam_bpe_2000_new/tokens.txt $d/
cp -av hf/test_wavs $d
cp -v README.md $d
tar cjfv $d.tar.bz2 $d
rm -rf hf
ls -lh
cd ..
mv models/* .
-
name
:
Publish to huggingface (Vietnamese zipformer)
env
:
HF_TOKEN
:
${{ secrets.HF_TOKEN }}
uses
:
nick-fields/retry@v3
with
:
max_attempts
:
20
timeout_seconds
:
200
shell
:
bash
command
:
|
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
models=(
sherpa-onnx-zipformer-vi-2025-04-20
sherpa-onnx-zipformer-vi-int8-2025-04-20
)
for d in ${models[@]}; do
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
rm -rf huggingface
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
cp -av $d/* huggingface
pushd huggingface
git lfs track "*.onnx"
git lfs track "bpe.model"
git lfs track "*.wav"
git status
git add .
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
popd
done
-
name
:
vosk-model-ru (zipformer)
if
:
false
shell
:
bash
run
:
|
rm -rf models
mkdir models
cd models
cat >README.md <<EOF
# Introduction
Models in this directory are from
https://huggingface.co/alphacep/vosk-model-ru/tree/main
EOF
git lfs install
git clone https://huggingface.co/alphacep/vosk-model-ru hf
ls -lh
mkdir test_wavs
pushd test_wavs
curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/russian-i-love-you.wav
curl -SL -O https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition/resolve/main/test_wavs/russian/test.wav
mv russian-i-love-you.wav 0.wav
mv test.wav 1.wav
popd
d=sherpa-onnx-zipformer-ru-2025-04-20
mkdir $d
cp -v hf/am-onnx/encoder.onnx $d
cp -v hf/am-onnx/decoder.onnx $d
cp -v hf/am-onnx/joiner.onnx $d
cp -v hf/lang/bpe.model $d
cp -v hf/lang/tokens.txt $d
cp -av test_wavs $d/
cp -v README.md $d
tar cjfv $d.tar.bz2 $d
d=sherpa-onnx-zipformer-ru-int8-2025-04-20
mkdir $d
cp -v hf/am-onnx/encoder.int8.onnx $d
cp -v hf/am-onnx/decoder.onnx $d
cp -v hf/am-onnx/joiner.int8.onnx $d
cp -v hf/lang/bpe.model $d
cp -v hf/lang/tokens.txt $d
cp -av test_wavs $d
cp -v README.md $d
tar cjfv $d.tar.bz2 $d
rm -rf hf
ls -lh
cd ..
mv models/* .
-
name
:
Publish to huggingface (Russian zipformer)
if
:
false
env
:
HF_TOKEN
:
${{ secrets.HF_TOKEN }}
uses
:
nick-fields/retry@v3
with
:
max_attempts
:
20
timeout_seconds
:
200
shell
:
bash
command
:
|
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
models=(
sherpa-onnx-zipformer-ru-2025-04-20
sherpa-onnx-zipformer-ru-int8-2025-04-20
)
for d in ${models[@]}; do
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
rm -rf huggingface
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d huggingface
cp -av $d/* huggingface
pushd huggingface
git lfs track "*.onnx"
git lfs track "bpe.model"
git lfs track "*.wav"
git status
git add .
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
popd
done
-
name
:
Release
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
repo_name
:
k2-fsa/sherpa-onnx
repo_token
:
${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag
:
asr-models
...
...
scripts/apk/generate-vad-asr-apk-script.py
查看文件 @
7cbb1bc
...
...
@@ -459,6 +459,79 @@ def get_models():
popd
"""
,
),
Model
(
model_name
=
"sherpa-onnx-zipformer-vi-int8-2025-04-20"
,
idx
=
26
,
lang
=
"vi"
,
lang2
=
"Vietnamese"
,
short_name
=
"zipformer"
,
cmd
=
"""
pushd $model_name
rm -rfv test_wavs
rm -fv bpe.model
ls -lh
popd
"""
,
),
Model
(
model_name
=
"sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19"
,
idx
=
27
,
lang
=
"ru"
,
lang2
=
"Russian"
,
short_name
=
"nemo_ctc_giga_am_v2"
,
cmd
=
"""
pushd $model_name
rm -rfv test_wavs
rm -fv *.sh
rm -fv *.py
ls -lh
popd
"""
,
),
Model
(
model_name
=
"sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19"
,
idx
=
28
,
lang
=
"ru"
,
lang2
=
"Russian"
,
short_name
=
"nemo_transducer_giga_am"
,
cmd
=
"""
pushd $model_name
rm -rfv test_wavs
rm -fv *.sh
rm -fv *.py
ls -lh
popd
"""
,
),
Model
(
model_name
=
"sherpa-onnx-zipformer-ru-int8-2025-04-20"
,
idx
=
29
,
lang
=
"ru"
,
lang2
=
"Russian"
,
short_name
=
"v2_zipformer"
,
cmd
=
"""
pushd $model_name
rm -rfv test_wavs
rm -fv bpe.model
ls -lh
popd
"""
,
),
]
return
models
...
...
sherpa-onnx/kotlin-api/OfflineRecognizer.kt
查看文件 @
7cbb1bc
...
...
@@ -496,6 +496,55 @@ fun getOfflineModelConfig(type: Int): OfflineModelConfig? {
tokens = "$modelDir/tokens.txt",
)
}
26 -> {
val modelDir = "sherpa-onnx-zipformer-vi-int8-2025-04-20"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder-epoch-12-avg-8.int8.onnx",
decoder = "$modelDir/decoder-epoch-12-avg-8.onnx",
joiner = "$modelDir/joiner-epoch-12-avg-8.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "transducer",
)
}
27 -> {
val modelDir = "sherpa-onnx-nemo-ctc-giga-am-v2-russian-2025-04-19"
return OfflineModelConfig(
nemo = OfflineNemoEncDecCtcModelConfig(
model = "$modelDir/model.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
)
}
28 -> {
val modelDir = "sherpa-onnx-nemo-transducer-giga-am-v2-russian-2025-04-19"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder.int8.onnx",
decoder = "$modelDir/decoder.onnx",
joiner = "$modelDir/joiner.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "nemo_transducer",
)
}
29 -> {
val modelDir = "sherpa-onnx-zipformer-ru-int8-2025-04-20"
return OfflineModelConfig(
transducer = OfflineTransducerModelConfig(
encoder = "$modelDir/encoder.int8.onnx",
decoder = "$modelDir/decoder.onnx",
joiner = "$modelDir/joiner.int8.onnx",
),
tokens = "$modelDir/tokens.txt",
modelType = "transducer",
)
}
}
return null
}
...
...
请
注册
或
登录
后发表评论