Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-10-17 11:58:14 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-10-17 11:58:14 +0800
Commit
620597f50192b92c49f733bec3666699e748b108
620597f5
1 parent
471cbd83
Support
https://huggingface.co/Revai/reverb-diarization-v1
(#1437)
显示空白字符变更
内嵌
并排对比
正在显示
9 个修改的文件
包含
276 行增加
和
12 行删除
.github/workflows/apk-speaker-diarization.yaml
.github/workflows/export-revai-segmentation-to-onnx.yaml
scripts/apk/build-apk-speaker-diarization.sh → scripts/apk/build-apk-speaker-diarization.sh.in
scripts/apk/generate-speaker-diarization-apk-script.py
scripts/pyannote/segmentation/export-onnx.py
scripts/pyannote/segmentation/preprocess.sh
scripts/pyannote/segmentation/run-revai.sh
scripts/pyannote/segmentation/speaker-diarization-torch.py
scripts/pyannote/segmentation/vad-onnx.py
.github/workflows/apk-speaker-diarization.yaml
查看文件 @
620597f
...
...
@@ -4,7 +4,6 @@ on:
push
:
branches
:
-
apk
-
android-demo-speaker-diarization-2
workflow_dispatch
:
...
...
@@ -76,6 +75,11 @@ jobs:
run
:
|
cd scripts/apk
total=${{ matrix.total }}
index=${{ matrix.index }}
python3 ./generate-speaker-diarization-apk-script.py --total $total --index $index
chmod +x build-apk-speaker-diarization.sh
mv -v ./build-apk-speaker-diarization.sh ../..
...
...
.github/workflows/export-revai-segmentation-to-onnx.yaml
0 → 100644
查看文件 @
620597f
name
:
export-revai-segmentation-to-onnx
on
:
workflow_dispatch
:
concurrency
:
group
:
export-revai-segmentation-to-onnx-${{ github.ref }}
cancel-in-progress
:
true
jobs
:
export-revai-segmentation-to-onnx
:
if
:
github.repository_owner == 'k2-fsa' || github.repository_owner == 'csukuangfj'
name
:
export revai segmentation models to ONNX
runs-on
:
${{ matrix.os }}
strategy
:
fail-fast
:
false
matrix
:
os
:
[
macos-latest
]
python-version
:
[
"
3.10"
]
steps
:
-
uses
:
actions/checkout@v4
-
name
:
Setup Python ${{ matrix.python-version }}
uses
:
actions/setup-python@v5
with
:
python-version
:
${{ matrix.python-version }}
-
name
:
Install pyannote
shell
:
bash
run
:
|
pip install pyannote.audio onnx==1.15.0 onnxruntime==1.16.3
-
name
:
Run
shell
:
bash
run
:
|
d=sherpa-onnx-reverb-diarization-v1
src=$PWD/$d
mkdir -p $src
pushd scripts/pyannote/segmentation
./run-revai.sh
cp ./*.onnx $src/
cp ./README.md $src/
cp ./LICENSE $src/
cp ./run-revai.sh $src/run.sh
cp ./*.py $src/
popd
ls -lh $d
tar cjfv $d.tar.bz2 $d
-
name
:
Release
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
file
:
./*.tar.bz2
overwrite
:
true
repo_name
:
k2-fsa/sherpa-onnx
repo_token
:
${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag
:
speaker-segmentation-models
-
name
:
Publish to huggingface
env
:
HF_TOKEN
:
${{ secrets.HF_TOKEN }}
uses
:
nick-fields/retry@v3
with
:
max_attempts
:
20
timeout_seconds
:
200
shell
:
bash
command
:
|
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
d=sherpa-onnx-reverb-diarization-v1
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
git clone https://huggingface.co/csukuangfj/$d huggingface
cp -v $d/* ./huggingface
cd huggingface
git lfs track "*.onnx"
git status
git add .
git status
git commit -m "add models"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/$d main
...
...
scripts/apk/build-apk-speaker-diarization.sh → scripts/apk/build-apk-speaker-diarization.sh
.in
查看文件 @
620597f
...
...
@@ -31,15 +31,24 @@ log "====================x86===================="
mkdir -p apks
{%
for
model
in
model_list %
}
pushd
./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
tar xvf sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
rm sherpa-onnx-pyannote-segmentation-3-0.tar.bz2
mv sherpa-onnx-pyannote-segmentation-3-0/model.onnx segmentation.onnx
rm -rf sherpa-onnx-pyannote-segmentation-3-0
ls -lh
model_name
={{
model.model_name
}}
short_name
={{
model.short_name
}}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/
$model_name
.tar.bz2
tar xvf
$model_name
.tar.bz2
rm
$model_name
.tar.bz2
mv
$model_name
/model.onnx segmentation.onnx
rm -rf
$model_name
if
[
! -f 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
fi
echo
"pwd:
$PWD
"
ls -lh
...
...
@@ -65,9 +74,13 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
./gradlew build
popd
mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-
${
SHERPA_ONNX_VERSION
}
-
$arch
-speaker-diarization-
pyannote_audio
-3dspeaker.apk
mv android/SherpaOnnxSpeakerDiarization/app/build/outputs/apk/debug/app-debug.apk ./apks/sherpa-onnx-
${
SHERPA_ONNX_VERSION
}
-
$arch
-speaker-diarization-
$short_name
-3dspeaker.apk
ls -lh apks
rm -v ./android/SherpaOnnxSpeakerDiarization/app/src/main/jniLibs/
$arch
/
*
.so
done
rm -rf ./android/SherpaOnnxSpeakerDiarization/app/src/main/assets/segmentation.onnx
{%
endfor %
}
ls -lh apks
...
...
scripts/apk/generate-speaker-diarization-apk-script.py
0 → 100755
查看文件 @
620597f
#!/usr/bin/env python3
import
argparse
from
dataclasses
import
dataclass
from
typing
import
List
import
jinja2
def
get_args
():
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--total"
,
type
=
int
,
default
=
1
,
help
=
"Number of runners"
,
)
parser
.
add_argument
(
"--index"
,
type
=
int
,
default
=
0
,
help
=
"Index of the current runner"
,
)
return
parser
.
parse_args
()
@dataclass
class
SpeakerSegmentationModel
:
model_name
:
str
short_name
:
str
=
""
def
get_models
()
->
List
[
SpeakerSegmentationModel
]:
models
=
[
SpeakerSegmentationModel
(
model_name
=
"sherpa-onnx-pyannote-segmentation-3-0"
,
short_name
=
"pyannote_audio"
,
),
SpeakerSegmentationModel
(
model_name
=
"sherpa-onnx-reverb-diarization-v1"
,
short_name
=
"revai_v1"
,
),
]
return
models
def
main
():
args
=
get_args
()
index
=
args
.
index
total
=
args
.
total
assert
0
<=
index
<
total
,
(
index
,
total
)
all_model_list
=
get_models
()
num_models
=
len
(
all_model_list
)
num_per_runner
=
num_models
//
total
if
num_per_runner
<=
0
:
raise
ValueError
(
f
"num_models: {num_models}, num_runners: {total}"
)
start
=
index
*
num_per_runner
end
=
start
+
num_per_runner
remaining
=
num_models
-
args
.
total
*
num_per_runner
print
(
f
"{index}/{total}: {start}-{end}/{num_models}"
)
d
=
dict
()
d
[
"model_list"
]
=
all_model_list
[
start
:
end
]
if
index
<
remaining
:
s
=
args
.
total
*
num_per_runner
+
index
d
[
"model_list"
]
.
append
(
all_model_list
[
s
])
print
(
f
"{s}/{num_models}"
)
filename_list
=
[
"./build-apk-speaker-diarization.sh"
]
for
filename
in
filename_list
:
environment
=
jinja2
.
Environment
()
with
open
(
f
"{filename}.in"
)
as
f
:
s
=
f
.
read
()
template
=
environment
.
from_string
(
s
)
s
=
template
.
render
(
**
d
)
with
open
(
filename
,
"w"
)
as
f
:
print
(
s
,
file
=
f
)
if
__name__
==
"__main__"
:
main
()
...
...
scripts/pyannote/segmentation/export-onnx.py
查看文件 @
620597f
#!/usr/bin/env python3
# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang)
import
os
from
typing
import
Any
,
Dict
import
onnx
...
...
@@ -35,6 +37,8 @@ def add_meta_data(filename: str, meta_data: Dict[str, Any]):
def
main
():
# You can download ./pytorch_model.bin from
# https://hf-mirror.com/csukuangfj/pyannote-models/tree/main/segmentation-3.0
# or from
# https://huggingface.co/Revai/reverb-diarization-v1/tree/main
pt_filename
=
"./pytorch_model.bin"
model
=
Model
.
from_pretrained
(
pt_filename
)
model
.
eval
()
...
...
@@ -94,6 +98,22 @@ def main():
receptive_field_size
=
int
(
model
.
receptive_field
.
duration
*
16000
)
receptive_field_shift
=
int
(
model
.
receptive_field
.
step
*
16000
)
is_revai
=
os
.
getenv
(
"SHERPA_ONNX_IS_REVAI"
,
""
)
if
is_revai
==
""
:
url_1
=
"https://huggingface.co/pyannote/segmentation-3.0"
url_2
=
"https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0"
license_url
=
(
"https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE"
)
model_author
=
"pyannote-audio"
else
:
url_1
=
"https://huggingface.co/Revai/reverb-diarization-v1"
url_2
=
"https://huggingface.co/csukuangfj/sherpa-onnx-reverb-diarization-v1"
license_url
=
(
"https://huggingface.co/Revai/reverb-diarization-v1/blob/main/LICENSE"
)
model_author
=
"Revai"
meta_data
=
{
"num_speakers"
:
len
(
model
.
specifications
.
classes
),
"powerset_max_classes"
:
model
.
specifications
.
powerset_max_classes
,
...
...
@@ -104,11 +124,11 @@ def main():
"receptive_field_shift"
:
receptive_field_shift
,
"model_type"
:
"pyannote-segmentation-3.0"
,
"version"
:
"1"
,
"model_author"
:
"pyannote"
,
"model_author"
:
model_author
,
"maintainer"
:
"k2-fsa"
,
"url_1"
:
"https://huggingface.co/pyannote/segmentation-3.0"
,
"url_2"
:
"https://huggingface.co/csukuangfj/pyannote-models/tree/main/segmentation-3.0"
,
"license"
:
"https://huggingface.co/pyannote/segmentation-3.0/blob/main/LICENSE"
,
"url_1"
:
url_1
,
"url_2"
:
url_2
,
"license"
:
license_url
,
}
add_meta_data
(
filename
=
filename
,
meta_data
=
meta_data
)
...
...
scripts/pyannote/segmentation/preprocess.sh
查看文件 @
620597f
#!/usr/bin/env bash
# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang)
python3 -m onnxruntime.quantization.preprocess --input model.onnx --output tmp.preprocessed.onnx
...
...
scripts/pyannote/segmentation/run-revai.sh
0 → 100755
查看文件 @
620597f
#!/usr/bin/env bash
# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang)
export
SHERPA_ONNX_IS_REVAI
=
1
set
-ex
function
install_pyannote
()
{
pip install pyannote.audio onnx onnxruntime
}
function
download_test_files
()
{
curl -SL -O https://huggingface.co/Revai/reverb-diarization-v1/resolve/main/pytorch_model.bin
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/lei-jun-test.wav
}
install_pyannote
download_test_files
./export-onnx.py
./preprocess.sh
echo
"----------torch----------"
./vad-torch.py
echo
"----------onnx model.onnx----------"
./vad-onnx.py --model ./model.onnx --wav ./lei-jun-test.wav
echo
"----------onnx model.int8.onnx----------"
./vad-onnx.py --model ./model.int8.onnx --wav ./lei-jun-test.wav
curl -SL -O https://huggingface.co/Revai/reverb-diarization-v1/resolve/main/LICENSE
cat >README.md
<< EOF
# Introduction
Models in this file are converted from
https://huggingface.co/Revai/reverb-diarization-v1/tree/main
Note that it is accessible under a non-commercial license.
Please see ./LICENSE for details.
See also
https://www.rev.com/blog/speech-to-text-technology/introducing-reverb-open-source-asr-diarization
EOF
...
...
scripts/pyannote/segmentation/speaker-diarization-torch.py
查看文件 @
620597f
#!/usr/bin/env python3
# Copyright 2024 Xiaomi Corp. (authors: Fangjun Kuang)
"""
Please refer to
...
...
scripts/pyannote/segmentation/vad-onnx.py
查看文件 @
620597f
...
...
@@ -216,6 +216,8 @@ def main():
is_active
=
classification
[
0
]
>
onset
start
=
None
if
is_active
:
start
=
0
scale
=
m
.
receptive_field_shift
/
m
.
sample_rate
scale_offset
=
m
.
receptive_field_size
/
m
.
sample_rate
*
0.5
...
...
请
注册
或
登录
后发表评论