Fangjun Kuang
Committed by GitHub

Add CI to build HAPs for HarmonyOS (#1578)

name: hap-vad-asr
on:
push:
branches:
- hap
- hap-ci
workflow_dispatch:
concurrency:
group: hap-vad-asr-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: write
jobs:
hap_vad_asr:
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
runs-on: ${{ matrix.os }}
name: Haps for vad asr ${{ matrix.index }}/${{ matrix.total }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest]
total: ["10"]
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
# https://github.com/actions/setup-java
- uses: actions/setup-java@v4
with:
distribution: 'temurin' # See 'Supported distributions' for available options
java-version: '17' # it requires jdk 17 to sigh the hap
- name: Show java version
shell: bash
run: |
which java
java --version
- name: cache-toolchain
id: cache-toolchain-ohos
uses: actions/cache@v4
with:
path: command-line-tools
key: commandline-tools-linux-x64-5.0.5.200.zip
- name: Download toolchain
if: steps.cache-toolchain-ohos.outputs.cache-hit != 'true'
shell: bash
run: |
curl -SL -O https://huggingface.co/csukuangfj/harmonyos-commandline-tools/resolve/main/commandline-tools-linux-x64-5.0.5.200.zip
unzip commandline-tools-linux-x64-5.0.5.200.zip
rm commandline-tools-linux-x64-5.0.5.200.zip
- name: Set environment variable
shell: bash
run: |
echo "$GITHUB_WORKSPACE/command-line-tools/sdk/default/openharmony/native/build-tools/cmake/bin" >> "$GITHUB_PATH"
which cmake
cmake --version
- name: Install Python dependencies
shell: bash
run: |
python3 -m pip install --upgrade pip jinja2
- name: Generate build script
shell: bash
run: |
cd scripts/hap
total=${{ matrix.total }}
index=${{ matrix.index }}
./generate-vad-asr-hap-script.py --total $total --index $index
ls -lh
chmod +x build-hap-vad-asr.sh
mv -v ./build-hap-vad-asr.sh ../..
- name: Generate secrets
shell: bash
run: |
echo "${{ secrets.HAP_SHERPA_ONNX_CER }}" > /tmp/sherpa_onnx.cer
shasum -a 256 /tmp/sherpa_onnx.cer
ls -lh /tmp/sherpa_onnx.cer
# macos
# base64 -i sherpa_onnx_profileRelease.p7b -o sherpa_onnx_profileRelease.p7b.base64
#
# linux
# base64 -w 0 sherpa_onnx_profileRelease.p7b > sherpa_onnx_profileRelease.p7b.base64
#
# cat sherpa_onnx_profileRelease.p7b.base64 | base64 --decode > sherpa_onnx_profileRelease.p7b
#
echo "${{ secrets.HAP_SHERPA_ONNX_PROFILE }}" | base64 --decode > /tmp/sherpa_onnx_profileRelease.p7b
echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" > ./sherpa_onnx_ohos_key.p12.base64
echo "${{ secrets.HAP_SHERPA_ONNX_KEY_STORE }}" | base64 --decode > /tmp/sherpa_onnx_ohos_key.p12
ls -l /tmp/sherpa_onnx_profileRelease.p7b
ls -l /tmp/sherpa_onnx_ohos_key.p12
ls -lh ./sherpa_onnx_ohos_key.p12.base64
shasum -a 256 ./sherpa_onnx_ohos_key.p12.base64
wc ./sherpa_onnx_ohos_key.p12.base64
rm ./sherpa_onnx_ohos_key.p12.base64
shasum -a 256 /tmp/sherpa_onnx_profileRelease.p7b
shasum -a 256 /tmp/sherpa_onnx_ohos_key.p12
- name: build HAP
env:
HAP_KEY_ALIAS: ${{ secrets.HAP_KEY_ALIAS }}
HAP_KEY_PWD: ${{ secrets.HAP_KEY_PWD }}
HAP_KEY_STORE_PWD: ${{ secrets.HAP_KEY_STORE_PWD }}
shell: bash
run: |
export COMMANDLINE_TOOLS_DIR=$GITHUB_WORKSPACE/command-line-tools
./build-hap-vad-asr.sh
# remove secrets
rm /tmp/sherpa_onnx.cer
rm /tmp/sherpa_onnx_profileRelease.p7b
rm /tmp/sherpa_onnx_ohos_key.p12
- name: Display HAPs
shell: bash
run: |
ls -lh ./haps/
du -h -d1 .
- name: Publish to huggingface
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3
with:
max_attempts: 20
timeout_seconds: 200
shell: bash
command: |
git config --global user.email "csukuangfj@gmail.com"
git config --global user.name "Fangjun Kuang"
rm -rf huggingface
export GIT_LFS_SKIP_SMUDGE=1
export GIT_CLONE_PROTECTION_ACTIVE=false
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION"
git clone https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os huggingface
cd huggingface
du -h -d1 .
git fetch
git pull
git merge -m "merge remote" --ff origin main
d=hap/vad-asr/$SHERPA_ONNX_VERSION
mkdir -p $d
cp -v ../haps/*.hap $d/
git status
git lfs track "*.hap"
git add .
git commit -m "add more HAPs"
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-harmony-os main
... ...
{
"meta": {
"stableOrder": true
},
"lockfileVersion": 3,
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
"specifiers": {
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
"sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
},
"packages": {
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
"name": "libsherpa_onnx.so",
"version": "1.0.0",
"resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
"registryType": "local"
},
"sherpa_onnx@1.10.32": {
"name": "sherpa_onnx",
"version": "1.10.32",
"integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
"resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
"registryType": "ohpm",
"dependencies": {
"libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
}
}
}
}
\ No newline at end of file
... ...
... ... @@ -5,6 +5,9 @@
"main": "",
"author": "",
"license": "",
"dependencies": {}
"dependencies": {
// please see https://ohpm.openharmony.cn/#/cn/detail/sherpa_onnx
"sherpa_onnx": "1.10.32",
}
}
... ...
... ... @@ -4,7 +4,7 @@
import { OfflineModelConfig } from 'sherpa_onnx';
export function getOfflineModelConfig(type: number): OfflineModelConfig {
const c = new OfflineModelConfig();
const c: OfflineModelConfig = new OfflineModelConfig();
switch (type) {
case 0: {
const modelDir = 'sherpa-onnx-paraformer-zh-2023-09-14'
... ...
... ... @@ -2,8 +2,11 @@ import { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope, worker } from '@kit
import {
OfflineRecognizer,
OfflineRecognizerConfig,
OfflineStream,
OnlineRecognizerResult,
readWaveFromBinary,
SileroVadConfig,
SpeechSegment,
Vad,
VadConfig,
} from 'sherpa_onnx';
... ... @@ -18,7 +21,7 @@ let vad: Vad; // vad for decoding files
function initVad(context: Context): Vad {
let mgr = context.resourceManager;
const config = new VadConfig(
const config: VadConfig = new VadConfig(
new SileroVadConfig(
'silero_vad.onnx',
0.5,
... ... @@ -37,7 +40,7 @@ function initVad(context: Context): Vad {
function initNonStreamingAsr(context: Context): OfflineRecognizer {
let mgr = context.resourceManager;
const config = new OfflineRecognizerConfig();
const config: OfflineRecognizerConfig = new OfflineRecognizerConfig();
// Note that you can switch to a new model by changing type
//
... ... @@ -61,7 +64,13 @@ function initNonStreamingAsr(context: Context): OfflineRecognizer {
const type = 2;
config.modelConfig = getOfflineModelConfig(type);
config.modelConfig.debug = true;
return new OfflineRecognizer(config, mgr)
config.ruleFsts = '';
return new OfflineRecognizer(config, mgr);
}
interface Wave {
samples: Float32Array;
sampleRate: number;
}
function decode(filename: string): string {
... ... @@ -71,44 +80,44 @@ function decode(filename: string): string {
const stat = fileIo.statSync(fp.fd);
const arrayBuffer = new ArrayBuffer(stat.size);
fileIo.readSync(fp.fd, arrayBuffer);
const data = new Uint8Array(arrayBuffer);
const data: Uint8Array = new Uint8Array(arrayBuffer);
const wave = readWaveFromBinary(data);
const wave: Wave = readWaveFromBinary(data);
console.log(`sample rate ${wave.sampleRate}`);
console.log(`samples length ${wave.samples.length}`);
const resultList: string[] = [];
const windowSize = vad.config.sileroVad.windowSize;
const windowSize: number = vad.config.sileroVad.windowSize;
for (let i = 0; i < wave.samples.length; i += windowSize) {
const thisWindow = wave.samples.subarray(i, i + windowSize)
const thisWindow: Float32Array = wave.samples.subarray(i, i + windowSize)
vad.acceptWaveform(thisWindow);
if (i + windowSize >= wave.samples.length) {
vad.flush();
}
while (!vad.isEmpty()) {
const segment = vad.front();
const _startTime = (segment.start / wave.sampleRate);
const _endTime = _startTime + segment.samples.length / wave.sampleRate;
const segment: SpeechSegment = vad.front();
const _startTime: number = (segment.start / wave.sampleRate);
const _endTime: number = _startTime + segment.samples.length / wave.sampleRate;
if (_endTime - _startTime < 0.2) {
vad.pop();
continue;
}
const startTime = _startTime.toFixed(2);
const endTime = _endTime.toFixed(2);
const startTime: string = _startTime.toFixed(2);
const endTime: string = _endTime.toFixed(2);
const progress = (segment.start + segment.samples.length) / wave.samples.length * 100;
const progress: number = (segment.start + segment.samples.length) / wave.samples.length * 100;
workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress });
const stream = recognizer.createStream();
const stream: OfflineStream = recognizer.createStream();
stream.acceptWaveform({ samples: segment.samples, sampleRate: wave.sampleRate });
recognizer.decode(stream);
const result = recognizer.getResult(stream);
const result: OnlineRecognizerResult = recognizer.getResult(stream);
const text = `${startTime} -- ${endTime} ${result.text}`
const text: string = `${startTime} -- ${endTime} ${result.text}`
resultList.push(text);
console.log(`partial result ${text}`);
... ...
... ... @@ -2,11 +2,6 @@
"modelVersion": "5.0.0",
"description": "Please describe the basic information.",
"dependencies": {
// You can download sherpa_onnx-v1.10.32.har
// from
// https://huggingface.co/csukuangfj/sherpa-onnx-harmony-os/tree/main/har
"sherpa_onnx": "file:./entry/sherpa_onnx-v1.10.32.har"
},
"devDependencies": {
"@ohos/hypium": "1.0.19"
... ...
... ... @@ -2,6 +2,7 @@
import argparse
from dataclasses import dataclass
from pathlib import Path
import jinja2
... ... @@ -34,6 +35,7 @@ class Model:
# e.g., zh, en, zh_en
lang: str
lang2: str
# e.g., whisper, paraformer, zipformer
short_name: str = ""
... ... @@ -51,6 +53,7 @@ def get_models():
model_name="sherpa-onnx-whisper-tiny.en",
idx=2,
lang="en",
lang2="English",
short_name="whisper_tiny",
cmd="""
pushd $model_name
... ... @@ -71,6 +74,7 @@ def get_models():
model_name="sherpa-onnx-paraformer-zh-2023-09-14",
idx=0,
lang="zh_en",
lang2="Chinese,English",
short_name="paraformer",
rule_fsts="itn_zh_number.fst",
cmd="""
... ... @@ -92,6 +96,7 @@ def get_models():
model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
idx=15,
lang="zh_en_ko_ja_yue",
lang2="中英粤日韩",
short_name="sense_voice",
cmd="""
pushd $model_name
... ... @@ -109,6 +114,7 @@ def get_models():
model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
idx=14,
lang="zh_en",
lang2="Chinese,English",
short_name="small_paraformer",
rule_fsts="itn_zh_number.fst",
cmd="""
... ... @@ -132,6 +138,7 @@ def get_models():
model_name="icefall-asr-zipformer-wenetspeech-20230615",
idx=4,
lang="zh",
lang2="Chinese",
short_name="zipformer",
rule_fsts="itn_zh_number.fst",
cmd="""
... ... @@ -159,6 +166,7 @@ def get_models():
model_name="sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k",
idx=7,
lang="be_de_en_es_fr_hr_it_pl_ru_uk",
lang2="be_de_en_es_fr_hr_it_pl_ru_uk",
short_name="fast_conformer_ctc_20k",
cmd="""
pushd $model_name
... ... @@ -174,6 +182,7 @@ def get_models():
model_name="sherpa-onnx-nemo-fast-conformer-ctc-en-24500",
idx=8,
lang="en",
lang2="English",
short_name="fast_conformer_ctc_24500",
cmd="""
pushd $model_name
... ... @@ -188,7 +197,8 @@ def get_models():
Model(
model_name="sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288",
idx=9,
lang="en_des_es_fr",
lang="en_de_es_fr",
lang2="English,German,Spanish,French",
short_name="fast_conformer_ctc_14288",
cmd="""
pushd $model_name
... ... @@ -204,6 +214,7 @@ def get_models():
model_name="sherpa-onnx-nemo-fast-conformer-ctc-es-1424",
idx=10,
lang="es",
lang2="Spanish",
short_name="fast_conformer_ctc_1424",
cmd="""
pushd $model_name
... ... @@ -219,6 +230,7 @@ def get_models():
model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
idx=11,
lang="zh",
lang2="Chinese",
short_name="telespeech",
rule_fsts="itn_zh_number.fst",
cmd="""
... ... @@ -239,6 +251,7 @@ def get_models():
model_name="sherpa-onnx-zipformer-thai-2024-06-20",
idx=12,
lang="th",
lang2="Thai",
short_name="zipformer",
cmd="""
pushd $model_name
... ... @@ -260,6 +273,7 @@ def get_models():
model_name="sherpa-onnx-zipformer-korean-2024-06-24",
idx=13,
lang="ko",
lang2="Korean",
short_name="zipformer",
cmd="""
pushd $model_name
... ... @@ -281,6 +295,7 @@ def get_models():
model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
idx=16,
lang="ja",
lang2="Japanese",
short_name="zipformer_reazonspeech",
cmd="""
pushd $model_name
... ... @@ -300,6 +315,7 @@ def get_models():
model_name="sherpa-onnx-zipformer-ru-2024-09-18",
idx=17,
lang="ru",
lang2="Russian",
short_name="zipformer",
cmd="""
pushd $model_name
... ... @@ -320,6 +336,7 @@ def get_models():
model_name="sherpa-onnx-small-zipformer-ru-2024-09-18",
idx=18,
lang="ru",
lang2="Russian",
short_name="small_zipformer",
cmd="""
pushd $model_name
... ... @@ -340,6 +357,7 @@ def get_models():
model_name="sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24",
idx=19,
lang="ru",
lang2="Russian",
short_name="nemo_ctc_giga_am",
cmd="""
pushd $model_name
... ... @@ -358,6 +376,7 @@ def get_models():
model_name="sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24",
idx=20,
lang="ru",
lang2="Russian",
short_name="nemo_transducer_giga_am",
cmd="""
pushd $model_name
... ... @@ -376,6 +395,7 @@ def get_models():
model_name="sherpa-onnx-moonshine-tiny-en-int8",
idx=21,
lang="en",
lang2="English",
short_name="moonshine_tiny_int8",
cmd="""
pushd $model_name
... ... @@ -391,6 +411,7 @@ def get_models():
model_name="sherpa-onnx-moonshine-base-en-int8",
idx=22,
lang="en",
lang2="English",
short_name="moonshine_base_int8",
cmd="""
pushd $model_name
... ... @@ -436,9 +457,14 @@ def main():
filename_list = [
"./build-apk-vad-asr.sh",
"./build-hap-vad-asr.sh",
]
for filename in filename_list:
environment = jinja2.Environment()
if not Path(f"{filename}.in").is_file():
print(f"skip {filename}")
continue
with open(f"{filename}.in") as f:
s = f.read()
template = environment.from_string(s)
... ...
!build-*.in
... ...
#!/usr/bin/env bash
#
# Auto generated! Please DO NOT EDIT!
# Please set the environment variable COMMANDLINE_TOOLS_DIR
# before running this script
# Inside the $COMMANDLINE_TOOL_DIR directory, you can find the following:
#
# command-line-tools fangjun$ ls
# LICENSE.txt NOTICE.txt bin codelinter hstack hvigor ohpm sdk tool
set -ex
log() {
# This function is from espnet
local fname=${BASH_SOURCE[1]##*/}
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}
SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
log "Building streaming VAD + ASR Hap for sherpa-onnx v${SHERPA_ONNX_VERSION}"
export SHERPA_ONNX_ENABLE_TTS=OFF
if [ ! -f $COMMANDLINE_TOOLS_DIR/bin/hvigorw ]; then
echo "Please first download Command Line Tools for HarmonyOS"
echo "See https://developer.huawei.com/consumer/cn/download/"
echo "or"
echo "https://hf-mirror.com/csukuangfj/harmonyos-commandline-tools/tree/main"
exit 1
fi
jar=$COMMANDLINE_TOOLS_DIR/sdk/default/openharmony/toolchains/lib/hap-sign-tool.jar
export PATH=$COMMANDLINE_TOOLS_DIR/bin:$PATH
mkdir -p haps
{% for model in model_list %}
pushd ./harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/rawfile
model_name={{ model.model_name }}
type={{ model.idx }}
lang={{ model.lang }}
lang2={{ model.lang2 }}
short_name={{ model.short_name }}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
tar xvf ${model_name}.tar.bz2
{{ model.cmd }}
rm -rf *.tar.bz2
ls -lh $model_name
if [ ! -f ./silero_vad.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
fi
popd
# Now we are at the project root directory
git checkout .
pushd harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/
sed -i.bak s/"const type = 2/const type = $type/" ./NonStreamingAsrWithVadWorker.ets
{% if model.rule_fsts %}
rule_fsts={{ model.rule_fsts }}
sed -i.bak s%"ruleFsts = ''"%"ruleFsts = \"$rule_fsts\""% ./NonStreamingAsrWithVadWorker.ets
{% endif %}
git diff
popd
pushd harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages
sed -i.bak s/English/$lang2/ ./Index.ets
popd
pushd harmony-os/SherpaOnnxVadAsr
git diff
cd entry
ohpm install
cd ..
hvigorw clean --no-daemon
hvigorw assembleHap --mode module -p product=default -p buildMode=release --no-daemon
ls -lh ./entry/build/default/outputs/default/entry-default-unsigned.hap
in_file=./entry/build/default/outputs/default/entry-default-unsigned.hap
out_file=$PWD/entry/build/default/outputs/default/entry-default-signed.hap
java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -mode "localSign" \
-appCertFile "/tmp/sherpa_onnx.cer" -profileFile "/tmp/sherpa_onnx_profileRelease.p7b" \
-inFile $in_file -keystoreFile "/tmp/sherpa_onnx_ohos_key.p12" \
-outFile $out_file -keyPwd "$HAP_KEY_PWD" -keystorePwd "$HAP_KEY_STORE_PWD" -signCode "1"
ls -l $in_file $out_file
ls -lh $in_file $out_file
rm $in_file
rm -rf ./entry/src/main/resources/rawfile/$model_name
popd
mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
ls -lh haps
{% endfor %}
git checkout .
ls -lh haps/
... ...
../apk/generate-vad-asr-apk-script.py
\ No newline at end of file
... ...