Committed by
GitHub
Support WebAssembly for text-to-speech (#577)
正在显示
20 个修改的文件
包含
888 行增加
和
69 行删除
.github/workflows/wasm-simd-hf-space-en.yaml
0 → 100644
| 1 | +name: wasm-simd-hf-space-en | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - wasm-2 | ||
| 7 | + release: | ||
| 8 | + types: | ||
| 9 | + - published | ||
| 10 | + | ||
| 11 | + workflow_dispatch: | ||
| 12 | + | ||
| 13 | +concurrency: | ||
| 14 | + group: wasm-simd-hf-space-en-${{ github.ref }} | ||
| 15 | + cancel-in-progress: true | ||
| 16 | + | ||
| 17 | +jobs: | ||
| 18 | + wasm-simd-hf-space-en: | ||
| 19 | + runs-on: ${{ matrix.os }} | ||
| 20 | + strategy: | ||
| 21 | + fail-fast: false | ||
| 22 | + matrix: | ||
| 23 | + os: [ubuntu-latest] | ||
| 24 | + | ||
| 25 | + steps: | ||
| 26 | + - uses: actions/checkout@v4 | ||
| 27 | + with: | ||
| 28 | + fetch-depth: 0 | ||
| 29 | + - name: Install emsdk | ||
| 30 | + uses: mymindstorm/setup-emsdk@v14 | ||
| 31 | + | ||
| 32 | + - name: View emsdk version | ||
| 33 | + shell: bash | ||
| 34 | + run: | | ||
| 35 | + emcc -v | ||
| 36 | + echo "--------------------" | ||
| 37 | + emcc --check | ||
| 38 | + | ||
| 39 | + - name: Download model files | ||
| 40 | + shell: bash | ||
| 41 | + run: | | ||
| 42 | + cd wasm/assets | ||
| 43 | + ls -lh | ||
| 44 | + echo "----------" | ||
| 45 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 46 | + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 47 | + rm vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 48 | + mv vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx ./model.onnx | ||
| 49 | + mv vits-piper-en_US-libritts_r-medium/tokens.txt ./ | ||
| 50 | + mv vits-piper-en_US-libritts_r-medium/espeak-ng-data ./ | ||
| 51 | + rm -rf vits-piper-en_US-libritts_r-medium | ||
| 52 | + | ||
| 53 | + ls -lh | ||
| 54 | + | ||
| 55 | + - name: Build sherpa-onnx for WebAssembly | ||
| 56 | + shell: bash | ||
| 57 | + run: | | ||
| 58 | + ./build-wasm-simd.sh | ||
| 59 | + | ||
| 60 | + - name: collect files | ||
| 61 | + shell: bash | ||
| 62 | + run: | | ||
| 63 | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 64 | + | ||
| 65 | + mv build-wasm-simd/install/bin/wasm sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en | ||
| 66 | + ls -lh sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en | ||
| 67 | + tar cjfv sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en.tar.bz2 ./sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en | ||
| 68 | + | ||
| 69 | + - name: Upload wasm files | ||
| 70 | + uses: actions/upload-artifact@v4 | ||
| 71 | + with: | ||
| 72 | + name: sherpa-onnx-wasm-simd-en | ||
| 73 | + path: ./sherpa-onnx-wasm-simd-*.tar.bz2 | ||
| 74 | + | ||
| 75 | + - name: Publish to huggingface | ||
| 76 | + env: | ||
| 77 | + HF_TOKEN: ${{ secrets.HF_TOKEN }} | ||
| 78 | + uses: nick-fields/retry@v2 | ||
| 79 | + with: | ||
| 80 | + max_attempts: 20 | ||
| 81 | + timeout_seconds: 200 | ||
| 82 | + shell: bash | ||
| 83 | + command: | | ||
| 84 | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) | ||
| 85 | + | ||
| 86 | + git config --global user.email "csukuangfj@gmail.com" | ||
| 87 | + git config --global user.name "Fangjun Kuang" | ||
| 88 | + | ||
| 89 | + rm -rf huggingface | ||
| 90 | + export GIT_LFS_SKIP_SMUDGE=1 | ||
| 91 | + | ||
| 92 | + git clone https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en huggingface | ||
| 93 | + cd huggingface | ||
| 94 | + git fetch | ||
| 95 | + git pull | ||
| 96 | + git merge -m "merge remote" --ff origin main | ||
| 97 | + | ||
| 98 | + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en/* . | ||
| 99 | + | ||
| 100 | + git status | ||
| 101 | + git lfs track "*.data" | ||
| 102 | + git lfs track "*.wasm" | ||
| 103 | + ls -lh | ||
| 104 | + | ||
| 105 | + git add . | ||
| 106 | + git commit -m "update model" | ||
| 107 | + git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en main |
| @@ -20,6 +20,8 @@ option(SHERPA_ONNX_ENABLE_JNI "Whether to build JNI internface" OFF) | @@ -20,6 +20,8 @@ option(SHERPA_ONNX_ENABLE_JNI "Whether to build JNI internface" OFF) | ||
| 20 | option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON) | 20 | option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON) |
| 21 | option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON) | 21 | option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON) |
| 22 | option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) | 22 | option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) |
| 23 | +option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF) | ||
| 24 | +option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON) | ||
| 23 | option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) | 25 | option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) |
| 24 | 26 | ||
| 25 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") | 27 | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") |
| @@ -99,6 +101,10 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}") | @@ -99,6 +101,10 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}") | ||
| 99 | message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") | 101 | message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") |
| 100 | message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") | 102 | message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") |
| 101 | message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") | 103 | message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") |
| 104 | +message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}") | ||
| 105 | +if(SHERPA_ONNX_ENABLE_WASM) | ||
| 106 | + add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1) | ||
| 107 | +endif() | ||
| 102 | 108 | ||
| 103 | if(NOT CMAKE_CXX_STANDARD) | 109 | if(NOT CMAKE_CXX_STANDARD) |
| 104 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") | 110 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") |
| @@ -109,7 +115,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") | @@ -109,7 +115,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") | ||
| 109 | 115 | ||
| 110 | include(CheckIncludeFileCXX) | 116 | include(CheckIncludeFileCXX) |
| 111 | 117 | ||
| 112 | -if(UNIX AND NOT APPLE) | 118 | +if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM) |
| 113 | check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) | 119 | check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) |
| 114 | if(SHERPA_ONNX_HAS_ALSA) | 120 | if(SHERPA_ONNX_HAS_ALSA) |
| 115 | add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1) | 121 | add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1) |
| @@ -160,6 +166,11 @@ endif() | @@ -160,6 +166,11 @@ endif() | ||
| 160 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) | 166 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) |
| 161 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) | 167 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) |
| 162 | 168 | ||
| 169 | +if(SHERPA_ONNX_ENABLE_WASM) | ||
| 170 | + # Enable it for debugging in case there is something wrong. | ||
| 171 | + # string(APPEND CMAKE_CXX_FLAGS " -g4 -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=1 ") | ||
| 172 | +endif() | ||
| 173 | + | ||
| 163 | if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux) | 174 | if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux) |
| 164 | if(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY) | 175 | if(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY) |
| 165 | message(STATUS "Link libstdc++ statically") | 176 | message(STATUS "Link libstdc++ statically") |
| @@ -200,9 +211,14 @@ include(piper-phonemize) | @@ -200,9 +211,14 @@ include(piper-phonemize) | ||
| 200 | 211 | ||
| 201 | add_subdirectory(sherpa-onnx) | 212 | add_subdirectory(sherpa-onnx) |
| 202 | 213 | ||
| 203 | -if(SHERPA_ONNX_ENABLE_C_API) | 214 | +if(SHERPA_ONNX_ENABLE_C_API AND SHERPA_ONNX_ENABLE_BINARY) |
| 204 | add_subdirectory(c-api-examples) | 215 | add_subdirectory(c-api-examples) |
| 205 | endif() | 216 | endif() |
| 217 | + | ||
| 218 | +if(SHERPA_ONNX_ENABLE_WASM) | ||
| 219 | + add_subdirectory(wasm) | ||
| 220 | +endif() | ||
| 221 | + | ||
| 206 | message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") | 222 | message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") |
| 207 | 223 | ||
| 208 | if(NOT BUILD_SHARED_LIBS) | 224 | if(NOT BUILD_SHARED_LIBS) |
build-wasm-simd.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# Copyright (c) 2024 Xiaomi Corporation | ||
| 3 | +# | ||
| 4 | +# This script is to build sherpa-onnx for WebAssembly | ||
| 5 | + | ||
| 6 | +set -ex | ||
| 7 | + | ||
| 8 | +if [ x"$EMSCRIPTEN" == x"" ]; then | ||
| 9 | + if ! command -v emcc &> /dev/null; then | ||
| 10 | + echo "Please install emscripten first" | ||
| 11 | + echo "" | ||
| 12 | + echo "You can use the following commands to install it:" | ||
| 13 | + echo "" | ||
| 14 | + echo "git clone https://github.com/emscripten-core/emsdk.git" | ||
| 15 | + echo "cd emsdk" | ||
| 16 | + echo "git pull" | ||
| 17 | + echo "./emsdk install latest" | ||
| 18 | + echo "./emsdk activate latest" | ||
| 19 | + echo "source ./emsdk_env.sh" | ||
| 20 | + exit 1 | ||
| 21 | + else | ||
| 22 | + EMSCRIPTEN=$(dirname $(realpath $(which emcc))) | ||
| 23 | + fi | ||
| 24 | +fi | ||
| 25 | + | ||
| 26 | +export EMSCRIPTEN=$EMSCRIPTEN | ||
| 27 | +echo "EMSCRIPTEN: $EMSCRIPTEN" | ||
| 28 | +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then | ||
| 29 | + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake" | ||
| 30 | + echo "Please make sure you have installed emsdk correctly" | ||
| 31 | + exit 1 | ||
| 32 | +fi | ||
| 33 | + | ||
| 34 | +mkdir -p build-wasm-simd | ||
| 35 | +pushd build-wasm-simd | ||
| 36 | + | ||
| 37 | +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON | ||
| 38 | + | ||
| 39 | +cmake \ | ||
| 40 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 41 | + -DCMAKE_BUILD_TYPE=Release \ | ||
| 42 | + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \ | ||
| 43 | + \ | ||
| 44 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 45 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 46 | + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ | ||
| 47 | + -DBUILD_SHARED_LIBS=OFF \ | ||
| 48 | + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ | ||
| 49 | + -DSHERPA_ONNX_ENABLE_JNI=OFF \ | ||
| 50 | + -DSHERPA_ONNX_ENABLE_C_API=ON \ | ||
| 51 | + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \ | ||
| 52 | + -DSHERPA_ONNX_ENABLE_GPU=OFF \ | ||
| 53 | + -DSHERPA_ONNX_ENABLE_WASM=ON \ | ||
| 54 | + -DSHERPA_ONNX_ENABLE_BINARY=OFF \ | ||
| 55 | + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \ | ||
| 56 | + .. | ||
| 57 | +make -j2 | ||
| 58 | +make install |
| 1 | function(download_espeak_ng_for_piper) | 1 | function(download_espeak_ng_for_piper) |
| 2 | include(FetchContent) | 2 | include(FetchContent) |
| 3 | 3 | ||
| 4 | - set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip") | ||
| 5 | - set(espeak_ng_URL2 "") | ||
| 6 | - set(espeak_ng_HASH "SHA256=8a48251e6926133dd91fcf6cb210c7c2e290a9b578d269446e2d32d710b0dfa0") | 4 | + set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/69bf6927964fb042aeb827cfdf6082a30f5802eb.zip") |
| 5 | + set(espeak_ng_URL2 "https://hub.nuaa.cf/csukuangfj/espeak-ng/archive/69bf6927964fb042aeb827cfdf6082a30f5802eb.zip") | ||
| 6 | + set(espeak_ng_HASH "SHA256=745e35b21ece6804b4a1839722f9e625ac909380c8f85873ad71bf145877075a") | ||
| 7 | 7 | ||
| 8 | set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE) | 8 | set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE) |
| 9 | set(USE_ASYNC OFF CACHE BOOL "" FORCE) | 9 | set(USE_ASYNC OFF CACHE BOOL "" FORCE) |
| @@ -15,14 +15,18 @@ function(download_espeak_ng_for_piper) | @@ -15,14 +15,18 @@ function(download_espeak_ng_for_piper) | ||
| 15 | set(EXTRA_cmn ON CACHE BOOL "" FORCE) | 15 | set(EXTRA_cmn ON CACHE BOOL "" FORCE) |
| 16 | set(EXTRA_ru ON CACHE BOOL "" FORCE) | 16 | set(EXTRA_ru ON CACHE BOOL "" FORCE) |
| 17 | 17 | ||
| 18 | + if(SHERPA_ONNX_ENABLE_WASM) | ||
| 19 | + set(BUILD_ESPEAK_NG_EXE OFF CACHE BOOL "" FORCE) | ||
| 20 | + endif() | ||
| 21 | + | ||
| 18 | # If you don't have access to the Internet, | 22 | # If you don't have access to the Internet, |
| 19 | # please pre-download kaldi-decoder | 23 | # please pre-download kaldi-decoder |
| 20 | set(possible_file_locations | 24 | set(possible_file_locations |
| 21 | - $ENV{HOME}/Downloads/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip | ||
| 22 | - ${CMAKE_SOURCE_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip | ||
| 23 | - ${CMAKE_BINARY_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip | ||
| 24 | - /tmp/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip | ||
| 25 | - /star-fj/fangjun/download/github/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip | 25 | + $ENV{HOME}/Downloads/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip |
| 26 | + ${CMAKE_SOURCE_DIR}/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip | ||
| 27 | + ${CMAKE_BINARY_DIR}/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip | ||
| 28 | + /tmp/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip | ||
| 29 | + /star-fj/fangjun/download/github/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip | ||
| 26 | ) | 30 | ) |
| 27 | 31 | ||
| 28 | foreach(f IN LISTS possible_file_locations) | 32 | foreach(f IN LISTS possible_file_locations) |
| 1 | function(download_kaldi_decoder) | 1 | function(download_kaldi_decoder) |
| 2 | include(FetchContent) | 2 | include(FetchContent) |
| 3 | 3 | ||
| 4 | - set(kaldi_decoder_URL "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.3.tar.gz") | ||
| 5 | - set(kaldi_decoder_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-decoder-0.2.3.tar.gz") | ||
| 6 | - set(kaldi_decoder_HASH "SHA256=98bf445a5b7961ccf3c3522317d900054eaadb6a9cdcf4531e7d9caece94a56d") | 4 | + set(kaldi_decoder_URL "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.4.tar.gz") |
| 5 | + set(kaldi_decoder_URL2 "https://hub.nuaa.cf/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.4.tar.gz") | ||
| 6 | + set(kaldi_decoder_HASH "SHA256=136d96c2f1f8ec44de095205f81a6ce98981cd867fe4ba840f9415a0b58fe601") | ||
| 7 | 7 | ||
| 8 | set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE) | 8 | set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE) |
| 9 | set(KALDI_DECODER_ENABLE_TESTS OFF CACHE BOOL "" FORCE) | 9 | set(KALDI_DECODER_ENABLE_TESTS OFF CACHE BOOL "" FORCE) |
| @@ -12,11 +12,11 @@ function(download_kaldi_decoder) | @@ -12,11 +12,11 @@ function(download_kaldi_decoder) | ||
| 12 | # If you don't have access to the Internet, | 12 | # If you don't have access to the Internet, |
| 13 | # please pre-download kaldi-decoder | 13 | # please pre-download kaldi-decoder |
| 14 | set(possible_file_locations | 14 | set(possible_file_locations |
| 15 | - $ENV{HOME}/Downloads/kaldi-decoder-0.2.3.tar.gz | ||
| 16 | - ${CMAKE_SOURCE_DIR}/kaldi-decoder-0.2.3.tar.gz | ||
| 17 | - ${CMAKE_BINARY_DIR}/kaldi-decoder-0.2.3.tar.gz | ||
| 18 | - /tmp/kaldi-decoder-0.2.3.tar.gz | ||
| 19 | - /star-fj/fangjun/download/github/kaldi-decoder-0.2.3.tar.gz | 15 | + $ENV{HOME}/Downloads/kaldi-decoder-0.2.4.tar.gz |
| 16 | + ${CMAKE_SOURCE_DIR}/kaldi-decoder-0.2.4.tar.gz | ||
| 17 | + ${CMAKE_BINARY_DIR}/kaldi-decoder-0.2.4.tar.gz | ||
| 18 | + /tmp/kaldi-decoder-0.2.4.tar.gz | ||
| 19 | + /star-fj/fangjun/download/github/kaldi-decoder-0.2.4.tar.gz | ||
| 20 | ) | 20 | ) |
| 21 | 21 | ||
| 22 | foreach(f IN LISTS possible_file_locations) | 22 | foreach(f IN LISTS possible_file_locations) |
| 1 | function(download_kaldifst) | 1 | function(download_kaldifst) |
| 2 | include(FetchContent) | 2 | include(FetchContent) |
| 3 | 3 | ||
| 4 | - set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.9.tar.gz") | ||
| 5 | - set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.9.tar.gz") | ||
| 6 | - set(kaldifst_HASH "SHA256=8c653021491dca54c38ab659565edfab391418a79ae87099257863cd5664dd39") | 4 | + set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.10.tar.gz") |
| 5 | + set(kaldifst_URL2 "https://hub.nuaa.cf/k2-fsa/kaldifst/archive/refs/tags/v1.7.10.tar.gz") | ||
| 6 | + set(kaldifst_HASH "SHA256=7f7b3173a6584a6b1987f65ae7af2ac453d66b845f875a9d31074b8d2cd0de54") | ||
| 7 | 7 | ||
| 8 | # If you don't have access to the Internet, | 8 | # If you don't have access to the Internet, |
| 9 | # please pre-download kaldifst | 9 | # please pre-download kaldifst |
| 10 | set(possible_file_locations | 10 | set(possible_file_locations |
| 11 | - $ENV{HOME}/Downloads/kaldifst-1.7.9.tar.gz | ||
| 12 | - ${CMAKE_SOURCE_DIR}/kaldifst-1.7.9.tar.gz | ||
| 13 | - ${CMAKE_BINARY_DIR}/kaldifst-1.7.9.tar.gz | ||
| 14 | - /tmp/kaldifst-1.7.9.tar.gz | ||
| 15 | - /star-fj/fangjun/download/github/kaldifst-1.7.9.tar.gz | 11 | + $ENV{HOME}/Downloads/kaldifst-1.7.10.tar.gz |
| 12 | + ${CMAKE_SOURCE_DIR}/kaldifst-1.7.10.tar.gz | ||
| 13 | + ${CMAKE_BINARY_DIR}/kaldifst-1.7.10.tar.gz | ||
| 14 | + /tmp/kaldifst-1.7.10.tar.gz | ||
| 15 | + /star-fj/fangjun/download/github/kaldifst-1.7.10.tar.gz | ||
| 16 | ) | 16 | ) |
| 17 | 17 | ||
| 18 | foreach(f IN LISTS possible_file_locations) | 18 | foreach(f IN LISTS possible_file_locations) |
cmake/onnxruntime-wasm-simd.cmake
0 → 100644
| 1 | +# Copyright (c) 2022-2024 Xiaomi Corporation | ||
| 2 | +message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") | ||
| 3 | +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") | ||
| 4 | + | ||
| 5 | +if(NOT SHERPA_ONNX_ENABLE_WASM) | ||
| 6 | + message(FATAL_ERROR "This file is for WebAssembly.") | ||
| 7 | +endif() | ||
| 8 | + | ||
| 9 | +if(BUILD_SHARED_LIBS) | ||
| 10 | + message(FATAL_ERROR "BUILD_SHARED_LIBS should be OFF for WebAssembly") | ||
| 11 | +endif() | ||
| 12 | + | ||
| 13 | +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.0/onnxruntime-wasm-static_lib-simd-1.17.0.zip") | ||
| 14 | +set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.0/onnxruntime-wasm-static_lib-simd-1.17.0.zip") | ||
| 15 | +set(onnxruntime_HASH "SHA256=0ee6120d2ade093eff731af792fd137ac2db580eb2dc5b8bf39e0897b0d7afd9") | ||
| 16 | + | ||
| 17 | +# If you don't have access to the Internet, | ||
| 18 | +# please download onnxruntime to one of the following locations. | ||
| 19 | +# You can add more if you want. | ||
| 20 | +set(possible_file_locations | ||
| 21 | + $ENV{HOME}/Downloads/onnxruntime-wasm-static_lib-simd-1.17.0.zip | ||
| 22 | + ${CMAKE_SOURCE_DIR}/onnxruntime-wasm-static_lib-simd-1.17.0.zip | ||
| 23 | + ${CMAKE_BINARY_DIR}/onnxruntime-wasm-static_lib-simd-1.17.0.zip | ||
| 24 | + /tmp/onnxruntime-wasm-static_lib-simd-1.17.0.zip | ||
| 25 | + /star-fj/fangjun/download/github/onnxruntime-wasm-static_lib-simd-1.17.0.zip | ||
| 26 | +) | ||
| 27 | + | ||
| 28 | +foreach(f IN LISTS possible_file_locations) | ||
| 29 | + if(EXISTS ${f}) | ||
| 30 | + set(onnxruntime_URL "${f}") | ||
| 31 | + file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL) | ||
| 32 | + message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}") | ||
| 33 | + set(onnxruntime_URL2) | ||
| 34 | + break() | ||
| 35 | + endif() | ||
| 36 | +endforeach() | ||
| 37 | + | ||
| 38 | +FetchContent_Declare(onnxruntime | ||
| 39 | + URL | ||
| 40 | + ${onnxruntime_URL} | ||
| 41 | + ${onnxruntime_URL2} | ||
| 42 | + URL_HASH ${onnxruntime_HASH} | ||
| 43 | +) | ||
| 44 | + | ||
| 45 | +FetchContent_GetProperties(onnxruntime) | ||
| 46 | +if(NOT onnxruntime_POPULATED) | ||
| 47 | + message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}") | ||
| 48 | + FetchContent_Populate(onnxruntime) | ||
| 49 | +endif() | ||
| 50 | +message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}") | ||
| 51 | + | ||
| 52 | +# for static libraries, we use onnxruntime_lib_files directly below | ||
| 53 | +include_directories(${onnxruntime_SOURCE_DIR}/include) | ||
| 54 | + | ||
| 55 | +file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a") | ||
| 56 | + | ||
| 57 | +set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE) | ||
| 58 | + | ||
| 59 | +message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}") | ||
| 60 | +install(FILES ${onnxruntime_lib_files} DESTINATION lib) |
| @@ -4,8 +4,9 @@ function(download_onnxruntime) | @@ -4,8 +4,9 @@ function(download_onnxruntime) | ||
| 4 | 4 | ||
| 5 | message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") | 5 | message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") |
| 6 | message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") | 6 | message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") |
| 7 | - | ||
| 8 | - if(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) | 7 | + if(SHERPA_ONNX_ENABLE_WASM) |
| 8 | + include(onnxruntime-wasm-simd) | ||
| 9 | + elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) | ||
| 9 | if(BUILD_SHARED_LIBS) | 10 | if(BUILD_SHARED_LIBS) |
| 10 | include(onnxruntime-linux-aarch64) | 11 | include(onnxruntime-linux-aarch64) |
| 11 | else() | 12 | else() |
| @@ -11,6 +11,7 @@ | @@ -11,6 +11,7 @@ | ||
| 11 | 11 | ||
| 12 | #include "sherpa-onnx/csrc/circular-buffer.h" | 12 | #include "sherpa-onnx/csrc/circular-buffer.h" |
| 13 | #include "sherpa-onnx/csrc/display.h" | 13 | #include "sherpa-onnx/csrc/display.h" |
| 14 | +#include "sherpa-onnx/csrc/macros.h" | ||
| 14 | #include "sherpa-onnx/csrc/offline-recognizer.h" | 15 | #include "sherpa-onnx/csrc/offline-recognizer.h" |
| 15 | #include "sherpa-onnx/csrc/offline-tts.h" | 16 | #include "sherpa-onnx/csrc/offline-tts.h" |
| 16 | #include "sherpa-onnx/csrc/online-recognizer.h" | 17 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| @@ -90,7 +91,7 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | @@ -90,7 +91,7 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | ||
| 90 | SHERPA_ONNX_OR(config->hotwords_score, 1.5); | 91 | SHERPA_ONNX_OR(config->hotwords_score, 1.5); |
| 91 | 92 | ||
| 92 | if (config->model_config.debug) { | 93 | if (config->model_config.debug) { |
| 93 | - fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); | 94 | + SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); |
| 94 | } | 95 | } |
| 95 | 96 | ||
| 96 | SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; | 97 | SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; |
| @@ -320,7 +321,7 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | @@ -320,7 +321,7 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | ||
| 320 | SHERPA_ONNX_OR(config->hotwords_score, 1.5); | 321 | SHERPA_ONNX_OR(config->hotwords_score, 1.5); |
| 321 | 322 | ||
| 322 | if (config->model_config.debug) { | 323 | if (config->model_config.debug) { |
| 323 | - fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); | 324 | + SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str()); |
| 324 | } | 325 | } |
| 325 | 326 | ||
| 326 | SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; | 327 | SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; |
| @@ -476,7 +477,7 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( | @@ -476,7 +477,7 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( | ||
| 476 | vad_config.debug = SHERPA_ONNX_OR(config->debug, false); | 477 | vad_config.debug = SHERPA_ONNX_OR(config->debug, false); |
| 477 | 478 | ||
| 478 | if (vad_config.debug) { | 479 | if (vad_config.debug) { |
| 479 | - fprintf(stderr, "%s\n", vad_config.ToString().c_str()); | 480 | + SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str()); |
| 480 | } | 481 | } |
| 481 | 482 | ||
| 482 | SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector; | 483 | SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector; |
| @@ -566,7 +567,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | @@ -566,7 +567,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( | ||
| 566 | tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); | 567 | tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); |
| 567 | 568 | ||
| 568 | if (tts_config.model.debug) { | 569 | if (tts_config.model.debug) { |
| 569 | - fprintf(stderr, "%s\n", tts_config.ToString().c_str()); | 570 | + SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str()); |
| 570 | } | 571 | } |
| 571 | 572 | ||
| 572 | SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; | 573 | SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; |
| @@ -582,6 +583,10 @@ int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) { | @@ -582,6 +583,10 @@ int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) { | ||
| 582 | return tts->impl->SampleRate(); | 583 | return tts->impl->SampleRate(); |
| 583 | } | 584 | } |
| 584 | 585 | ||
| 586 | +int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) { | ||
| 587 | + return tts->impl->NumSpeakers(); | ||
| 588 | +} | ||
| 589 | + | ||
| 585 | const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( | 590 | const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( |
| 586 | const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, | 591 | const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, |
| 587 | float speed) { | 592 | float speed) { |
| @@ -658,6 +658,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts); | @@ -658,6 +658,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts); | ||
| 658 | SHERPA_ONNX_API int32_t | 658 | SHERPA_ONNX_API int32_t |
| 659 | SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts); | 659 | SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts); |
| 660 | 660 | ||
| 661 | +// Return the number of speakers of the current TTS object | ||
| 662 | +SHERPA_ONNX_API int32_t | ||
| 663 | +SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts); | ||
| 664 | + | ||
| 661 | // Generate audio from the given text and speaker id (sid). | 665 | // Generate audio from the given text and speaker id (sid). |
| 662 | // The user has to use DestroyOfflineTtsGeneratedAudio() to free the | 666 | // The user has to use DestroyOfflineTtsGeneratedAudio() to free the |
| 663 | // returned pointer to avoid memory leak. | 667 | // returned pointer to avoid memory leak. |
| @@ -128,9 +128,6 @@ if(APPLE) | @@ -128,9 +128,6 @@ if(APPLE) | ||
| 128 | ) | 128 | ) |
| 129 | endif() | 129 | endif() |
| 130 | 130 | ||
| 131 | -if(NOT WIN32) | ||
| 132 | - target_link_libraries(sherpa-onnx-core -pthread) | ||
| 133 | -endif() | ||
| 134 | 131 | ||
| 135 | if(ANDROID_NDK) | 132 | if(ANDROID_NDK) |
| 136 | target_link_libraries(sherpa-onnx-core android log) | 133 | target_link_libraries(sherpa-onnx-core android log) |
| @@ -172,36 +169,42 @@ if(SHERPA_ONNX_ENABLE_CHECK) | @@ -172,36 +169,42 @@ if(SHERPA_ONNX_ENABLE_CHECK) | ||
| 172 | endif() | 169 | endif() |
| 173 | 170 | ||
| 174 | if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux) | 171 | if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux) |
| 175 | - target_link_libraries(sherpa-onnx-core -pthread -ldl) | 172 | + target_link_libraries(sherpa-onnx-core -ldl) |
| 176 | endif() | 173 | endif() |
| 177 | 174 | ||
| 178 | -add_executable(sherpa-onnx sherpa-onnx.cc) | ||
| 179 | -add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc) | ||
| 180 | -add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc) | ||
| 181 | -add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc) | ||
| 182 | -add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc) | ||
| 183 | - | ||
| 184 | -set(main_exes | ||
| 185 | - sherpa-onnx | ||
| 186 | - sherpa-onnx-keyword-spotter | ||
| 187 | - sherpa-onnx-offline | ||
| 188 | - sherpa-onnx-offline-parallel | ||
| 189 | - sherpa-onnx-offline-tts | ||
| 190 | -) | 175 | +if(NOT WIN32 AND NOT SHERPA_ONNX_ENABLE_WASM AND CMAKE_SYSTEM_NAME STREQUAL Linux) |
| 176 | + target_link_libraries(sherpa-onnx-core -pthread) | ||
| 177 | +endif() | ||
| 191 | 178 | ||
| 192 | -foreach(exe IN LISTS main_exes) | ||
| 193 | - target_link_libraries(${exe} sherpa-onnx-core) | ||
| 194 | -endforeach() | 179 | +if(SHERPA_ONNX_ENABLE_BINARY) |
| 180 | + add_executable(sherpa-onnx sherpa-onnx.cc) | ||
| 181 | + add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc) | ||
| 182 | + add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc) | ||
| 183 | + add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc) | ||
| 184 | + add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc) | ||
| 185 | + | ||
| 186 | + set(main_exes | ||
| 187 | + sherpa-onnx | ||
| 188 | + sherpa-onnx-keyword-spotter | ||
| 189 | + sherpa-onnx-offline | ||
| 190 | + sherpa-onnx-offline-parallel | ||
| 191 | + sherpa-onnx-offline-tts | ||
| 192 | + ) | ||
| 195 | 193 | ||
| 196 | -if(NOT WIN32) | ||
| 197 | foreach(exe IN LISTS main_exes) | 194 | foreach(exe IN LISTS main_exes) |
| 198 | - target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib") | ||
| 199 | - target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib") | ||
| 200 | - | ||
| 201 | - if(SHERPA_ONNX_ENABLE_PYTHON) | ||
| 202 | - target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib") | ||
| 203 | - endif() | 195 | + target_link_libraries(${exe} sherpa-onnx-core) |
| 204 | endforeach() | 196 | endforeach() |
| 197 | + | ||
| 198 | + if(NOT WIN32) | ||
| 199 | + foreach(exe IN LISTS main_exes) | ||
| 200 | + target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib") | ||
| 201 | + target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib") | ||
| 202 | + | ||
| 203 | + if(SHERPA_ONNX_ENABLE_PYTHON) | ||
| 204 | + target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib") | ||
| 205 | + endif() | ||
| 206 | + endforeach() | ||
| 207 | + endif() | ||
| 205 | endif() | 208 | endif() |
| 206 | 209 | ||
| 207 | if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32) | 210 | if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32) |
| @@ -214,14 +217,16 @@ if(WIN32 AND BUILD_SHARED_LIBS) | @@ -214,14 +217,16 @@ if(WIN32 AND BUILD_SHARED_LIBS) | ||
| 214 | install(TARGETS sherpa-onnx-core DESTINATION bin) | 217 | install(TARGETS sherpa-onnx-core DESTINATION bin) |
| 215 | endif() | 218 | endif() |
| 216 | 219 | ||
| 217 | -install( | ||
| 218 | - TARGETS | ||
| 219 | - ${main_exes} | ||
| 220 | - DESTINATION | ||
| 221 | - bin | ||
| 222 | -) | 220 | +if(SHERPA_ONNX_ENABLE_BINARY) |
| 221 | + install( | ||
| 222 | + TARGETS | ||
| 223 | + ${main_exes} | ||
| 224 | + DESTINATION | ||
| 225 | + bin | ||
| 226 | + ) | ||
| 227 | +endif() | ||
| 223 | 228 | ||
| 224 | -if(SHERPA_ONNX_HAS_ALSA) | 229 | +if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY) |
| 225 | add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc) | 230 | add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc) |
| 226 | add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) | 231 | add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) |
| 227 | 232 | ||
| @@ -261,7 +266,7 @@ if(SHERPA_ONNX_HAS_ALSA) | @@ -261,7 +266,7 @@ if(SHERPA_ONNX_HAS_ALSA) | ||
| 261 | ) | 266 | ) |
| 262 | endif() | 267 | endif() |
| 263 | 268 | ||
| 264 | -if(SHERPA_ONNX_ENABLE_PORTAUDIO) | 269 | +if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY) |
| 265 | add_executable(sherpa-onnx-offline-tts-play | 270 | add_executable(sherpa-onnx-offline-tts-play |
| 266 | sherpa-onnx-offline-tts-play.cc | 271 | sherpa-onnx-offline-tts-play.cc |
| 267 | microphone.cc | 272 | microphone.cc |
| @@ -330,7 +335,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | @@ -330,7 +335,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) | ||
| 330 | ) | 335 | ) |
| 331 | endif() | 336 | endif() |
| 332 | 337 | ||
| 333 | -if(SHERPA_ONNX_ENABLE_WEBSOCKET) | 338 | +if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY) |
| 334 | add_definitions(-DASIO_STANDALONE) | 339 | add_definitions(-DASIO_STANDALONE) |
| 335 | add_definitions(-D_WEBSOCKETPP_CPP11_STL_) | 340 | add_definitions(-D_WEBSOCKETPP_CPP11_STL_) |
| 336 | 341 |
| @@ -16,6 +16,14 @@ | @@ -16,6 +16,14 @@ | ||
| 16 | fprintf(stderr, "\n"); \ | 16 | fprintf(stderr, "\n"); \ |
| 17 | __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__); \ | 17 | __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__); \ |
| 18 | } while (0) | 18 | } while (0) |
| 19 | +#elif SHERPA_ONNX_ENABLE_WASM | ||
| 20 | +#define SHERPA_ONNX_LOGE(...) \ | ||
| 21 | + do { \ | ||
| 22 | + fprintf(stdout, "%s:%s:%d ", __FILE__, __func__, \ | ||
| 23 | + static_cast<int>(__LINE__)); \ | ||
| 24 | + fprintf(stdout, ##__VA_ARGS__); \ | ||
| 25 | + fprintf(stdout, "\n"); \ | ||
| 26 | + } while (0) | ||
| 19 | #else | 27 | #else |
| 20 | #define SHERPA_ONNX_LOGE(...) \ | 28 | #define SHERPA_ONNX_LOGE(...) \ |
| 21 | do { \ | 29 | do { \ |
wasm/CMakeLists.txt
0 → 100644
| 1 | +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH}) | ||
| 2 | + message(FATAL_ERROR "Please use ./build-wasm.sh to build for wasm") | ||
| 3 | +endif() | ||
| 4 | + | ||
| 5 | +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin") | ||
| 6 | + message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin does not exist") | ||
| 7 | + # message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue") | ||
| 8 | +endif() | ||
| 9 | + | ||
| 10 | +set(exported_functions | ||
| 11 | + MyPrint | ||
| 12 | + SherpaOnnxCreateOfflineTts | ||
| 13 | + SherpaOnnxDestroyOfflineTts | ||
| 14 | + SherpaOnnxDestroyOfflineTtsGeneratedAudio | ||
| 15 | + SherpaOnnxOfflineTtsGenerate | ||
| 16 | + SherpaOnnxOfflineTtsGenerateWithCallback | ||
| 17 | + SherpaOnnxOfflineTtsNumSpeakers | ||
| 18 | + SherpaOnnxOfflineTtsSampleRate | ||
| 19 | + SherpaOnnxWriteWave | ||
| 20 | +) | ||
| 21 | +set(mangled_exported_functions) | ||
| 22 | +foreach(x IN LISTS exported_functions) | ||
| 23 | + list(APPEND mangled_exported_functions "_${x}") | ||
| 24 | +endforeach() | ||
| 25 | +list(JOIN mangled_exported_functions "," all_exported_functions) | ||
| 26 | + | ||
| 27 | + | ||
| 28 | +include_directories(${CMAKE_SOURCE_DIR}) | ||
| 29 | +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1") | ||
| 30 | +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB | ||
| 31 | +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ") | ||
| 32 | +string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ") | ||
| 33 | +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ") | ||
| 34 | + | ||
| 35 | +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}") | ||
| 36 | +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}") | ||
| 37 | +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}") | ||
| 38 | + | ||
| 39 | +if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js") | ||
| 40 | + message(FATAL_ERROR "The default suffix for building executables should be .js!") | ||
| 41 | +endif() | ||
| 42 | +# set(CMAKE_EXECUTABLE_SUFFIX ".html") | ||
| 43 | + | ||
| 44 | +add_executable(sherpa-onnx-wasm-main sherpa-onnx-wasm-main.cc) | ||
| 45 | +target_link_libraries(sherpa-onnx-wasm-main sherpa-onnx-c-api) | ||
| 46 | +install(TARGETS sherpa-onnx-wasm-main DESTINATION bin/wasm) | ||
| 47 | + | ||
| 48 | +install( | ||
| 49 | + FILES | ||
| 50 | + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.js" | ||
| 51 | + "index.html" | ||
| 52 | + "sherpa-onnx.js" | ||
| 53 | + "app.js" | ||
| 54 | + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.wasm" | ||
| 55 | + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.data" | ||
| 56 | + # "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.html" | ||
| 57 | + DESTINATION | ||
| 58 | + bin/wasm | ||
| 59 | +) |
wasm/app.js
0 → 100644
| 1 | +const generateBtn = document.getElementById('generateBtn'); | ||
| 2 | +const hint = document.getElementById('hint'); | ||
| 3 | +const speakerIdLabel = document.getElementById('speakerIdLabel'); | ||
| 4 | +const speakerIdInput = document.getElementById('speakerId'); | ||
| 5 | +const speedInput = document.getElementById('speed'); | ||
| 6 | +const speedValue = document.getElementById('speedValue'); | ||
| 7 | +const textArea = document.getElementById('text'); | ||
| 8 | +const soundClips = document.getElementById('sound-clips'); | ||
| 9 | + | ||
| 10 | +speedValue.innerHTML = speedInput.value; | ||
| 11 | + | ||
| 12 | +let index = 0; | ||
| 13 | + | ||
| 14 | + | ||
| 15 | +let tts = null; | ||
| 16 | + | ||
| 17 | +let audioCtx = null; | ||
| 18 | + | ||
| 19 | + | ||
| 20 | +Module = {}; | ||
| 21 | +Module.onRuntimeInitialized = function() { | ||
| 22 | + console.log('Model files downloaded!'); | ||
| 23 | + | ||
| 24 | + console.log('Initializing tts ......'); | ||
| 25 | + tts = initSherpaOnnxOfflineTts() | ||
| 26 | + if (tts.numSpeakers > 1) { | ||
| 27 | + speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`; | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + hint.innerText = | ||
| 31 | + 'Initialized! Please enter text and click the Generate button.'; | ||
| 32 | + | ||
| 33 | + | ||
| 34 | + | ||
| 35 | + generateBtn.disabled = false; | ||
| 36 | +}; | ||
| 37 | + | ||
| 38 | +speedInput.oninput = function() { | ||
| 39 | + speedValue.innerHTML = this.value; | ||
| 40 | +}; | ||
| 41 | + | ||
| 42 | +generateBtn.onclick = function() { | ||
| 43 | + let speakerId = speakerIdInput.value; | ||
| 44 | + if (speakerId.trim().length == 0) { | ||
| 45 | + alert('Please input a speakerId'); | ||
| 46 | + return; | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + if (!speakerId.match(/^\d+$/)) { | ||
| 50 | + alert(`Input speakerID ${ | ||
| 51 | + speakerId} is not a number.\nPlease enter a number between 0 and ${ | ||
| 52 | + tts.numSpeakers - 1}`); | ||
| 53 | + return; | ||
| 54 | + } | ||
| 55 | + speakerId = parseInt(speakerId, 10); | ||
| 56 | + if (speakerId > tts.numSpeakers - 1) { | ||
| 57 | + alert(`Pleaser enter a number between 0 and ${tts.numSpeakers - 1}`); | ||
| 58 | + return; | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + let text = textArea.value.trim(); | ||
| 62 | + if (text.length == 0) { | ||
| 63 | + alert('Please input a non-blank text'); | ||
| 64 | + return; | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + console.log('speakerId', speakerId); | ||
| 68 | + console.log('speed', speedInput.value); | ||
| 69 | + console.log('text', text); | ||
| 70 | + | ||
| 71 | + let audio = | ||
| 72 | + tts.generate({text: text, sid: speakerId, speed: speedInput.value}); | ||
| 73 | + | ||
| 74 | + console.log(audio.samples.length, audio.sampleRate); | ||
| 75 | + | ||
| 76 | + if (!audioCtx) { | ||
| 77 | + audioCtx = new AudioContext({sampleRate: tts.sampleRate}); | ||
| 78 | + } | ||
| 79 | + | ||
| 80 | + const buffer = audioCtx.createBuffer(1, audio.samples.length, tts.sampleRate); | ||
| 81 | + | ||
| 82 | + const ptr = buffer.getChannelData(0); | ||
| 83 | + for (let i = 0; i < audio.samples.length; i++) { | ||
| 84 | + ptr[i] = audio.samples[i]; | ||
| 85 | + } | ||
| 86 | + const source = audioCtx.createBufferSource(); | ||
| 87 | + source.buffer = buffer; | ||
| 88 | + source.connect(audioCtx.destination); | ||
| 89 | + source.start(); | ||
| 90 | + | ||
| 91 | + createAudioTag(audio); | ||
| 92 | +}; | ||
| 93 | + | ||
| 94 | +function createAudioTag(generateAudio) { | ||
| 95 | + const blob = toWav(generateAudio.samples, generateAudio.sampleRate); | ||
| 96 | + | ||
| 97 | + const text = textArea.value.trim().substring(0, 100); | ||
| 98 | + const clipName = `${index} ${text} ...`; | ||
| 99 | + index += 1; | ||
| 100 | + | ||
| 101 | + const clipContainer = document.createElement('article'); | ||
| 102 | + const clipLabel = document.createElement('p'); | ||
| 103 | + const audio = document.createElement('audio'); | ||
| 104 | + const deleteButton = document.createElement('button'); | ||
| 105 | + clipContainer.classList.add('clip'); | ||
| 106 | + audio.setAttribute('controls', ''); | ||
| 107 | + deleteButton.textContent = 'Delete'; | ||
| 108 | + deleteButton.className = 'delete'; | ||
| 109 | + | ||
| 110 | + clipLabel.textContent = clipName; | ||
| 111 | + | ||
| 112 | + clipContainer.appendChild(audio); | ||
| 113 | + | ||
| 114 | + clipContainer.appendChild(clipLabel); | ||
| 115 | + clipContainer.appendChild(deleteButton); | ||
| 116 | + soundClips.appendChild(clipContainer); | ||
| 117 | + | ||
| 118 | + audio.controls = true; | ||
| 119 | + | ||
| 120 | + const audioURL = window.URL.createObjectURL(blob); | ||
| 121 | + audio.src = audioURL; | ||
| 122 | + | ||
| 123 | + deleteButton.onclick = function(e) { | ||
| 124 | + let evtTgt = e.target; | ||
| 125 | + evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode); | ||
| 126 | + }; | ||
| 127 | + | ||
| 128 | + clipLabel.onclick = function() { | ||
| 129 | + const existingName = clipLabel.textContent; | ||
| 130 | + const newClipName = prompt('Enter a new name for your sound clip?'); | ||
| 131 | + if (newClipName === null) { | ||
| 132 | + clipLabel.textContent = existingName; | ||
| 133 | + } else { | ||
| 134 | + clipLabel.textContent = newClipName; | ||
| 135 | + } | ||
| 136 | + }; | ||
| 137 | +} | ||
| 138 | + | ||
| 139 | +// this function is copied/modified from | ||
| 140 | +// https://gist.github.com/meziantou/edb7217fddfbb70e899e | ||
| 141 | +function toWav(floatSamples, sampleRate) { | ||
| 142 | + let samples = new Int16Array(floatSamples.length); | ||
| 143 | + for (let i = 0; i < samples.length; ++i) { | ||
| 144 | + let s = floatSamples[i]; | ||
| 145 | + if (s >= 1) | ||
| 146 | + s = 1; | ||
| 147 | + else if (s <= -1) | ||
| 148 | + s = -1; | ||
| 149 | + | ||
| 150 | + samples[i] = s * 32767; | ||
| 151 | + } | ||
| 152 | + | ||
| 153 | + let buf = new ArrayBuffer(44 + samples.length * 2); | ||
| 154 | + var view = new DataView(buf); | ||
| 155 | + | ||
| 156 | + // http://soundfile.sapp.org/doc/WaveFormat/ | ||
| 157 | + // F F I R | ||
| 158 | + view.setUint32(0, 0x46464952, true); // chunkID | ||
| 159 | + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize | ||
| 160 | + // E V A W | ||
| 161 | + view.setUint32(8, 0x45564157, true); // format | ||
| 162 | + // | ||
| 163 | + // t m f | ||
| 164 | + view.setUint32(12, 0x20746d66, true); // subchunk1ID | ||
| 165 | + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM | ||
| 166 | + view.setUint32(20, 1, true); // audioFormat, 1 for PCM | ||
| 167 | + view.setUint16(22, 1, true); // numChannels: 1 channel | ||
| 168 | + view.setUint32(24, sampleRate, true); // sampleRate | ||
| 169 | + view.setUint32(28, sampleRate * 2, true); // byteRate | ||
| 170 | + view.setUint16(32, 2, true); // blockAlign | ||
| 171 | + view.setUint16(34, 16, true); // bitsPerSample | ||
| 172 | + view.setUint32(36, 0x61746164, true); // Subchunk2ID | ||
| 173 | + view.setUint32(40, samples.length * 2, true); // subchunk2Size | ||
| 174 | + | ||
| 175 | + let offset = 44; | ||
| 176 | + for (let i = 0; i < samples.length; ++i) { | ||
| 177 | + view.setInt16(offset, samples[i], true); | ||
| 178 | + offset += 2; | ||
| 179 | + } | ||
| 180 | + | ||
| 181 | + return new Blob([view], {type: 'audio/wav'}); | ||
| 182 | +} |
wasm/assets/.gitignore
0 → 100644
wasm/assets/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +Please refer to | ||
| 4 | +https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models | ||
| 5 | +to download a model. | ||
| 6 | + | ||
| 7 | +The following is an example: | ||
| 8 | +``` | ||
| 9 | +cd sherpa-onnx/wasm/tts/assets | ||
| 10 | + | ||
| 11 | +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 12 | +tar xf vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 13 | +rm vits-piper-en_US-libritts_r-medium.tar.bz2 | ||
| 14 | +mv vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx ./model.onnx | ||
| 15 | +mv vits-piper-en_US-libritts_r-medium/tokens.txt ./ | ||
| 16 | +mv vits-piper-en_US-libritts_r-medium/espeak-ng-data ./ | ||
| 17 | +rm -rf vits-piper-en_US-libritts_r-medium | ||
| 18 | +``` | ||
| 19 | + | ||
| 20 | +You should have the following files in `assets` before you can run | ||
| 21 | +`build-wasm-simd.sh` | ||
| 22 | + | ||
| 23 | +``` | ||
| 24 | +assets fangjun$ tree -L 1 | ||
| 25 | +. | ||
| 26 | +├── README.md | ||
| 27 | +├── espeak-ng-data | ||
| 28 | +├── mode.onnx | ||
| 29 | +└── tokens.txt | ||
| 30 | + | ||
| 31 | +1 directory, 3 files | ||
| 32 | +``` |
wasm/index.html
0 → 100644
| 1 | +<html lang="en"> | ||
| 2 | + | ||
| 3 | +<head> | ||
| 4 | + <meta charset="utf-8"> | ||
| 5 | + <meta name="viewport" content="width=device-width" /> | ||
| 6 | + <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title> | ||
| 7 | + <style> | ||
| 8 | + h1,div { | ||
| 9 | + text-align: center; | ||
| 10 | + } | ||
| 11 | + textarea { | ||
| 12 | + width:100%; | ||
| 13 | + } | ||
| 14 | + </style> | ||
| 15 | +</head> | ||
| 16 | + | ||
| 17 | +<body> | ||
| 18 | + <h1> | ||
| 19 | + Next-gen Kaldi + WebAssembly<br/> | ||
| 20 | + Text-to-speech Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a> | ||
| 21 | + </h1> | ||
| 22 | + <div> | ||
| 23 | + <span id="hint">Loading model ... ...</span> | ||
| 24 | + <br/> | ||
| 25 | + <br/> | ||
| 26 | + <label for="speakerId" id="speakerIdLabel">Speaker ID: </label> | ||
| 27 | + <input type="text" id="speakerId" name="speakerId" value="0" /> | ||
| 28 | + <br/> | ||
| 29 | + <br/> | ||
| 30 | + <label for="speed" id="speedLabel">Speed: </label> | ||
| 31 | + <input type="range" id="speed" name="speed" min="0.4" max="3.5" step="0.1" value="1.0" /> | ||
| 32 | + <span id="speedValue"></span> | ||
| 33 | + <br/> | ||
| 34 | + <br/> | ||
| 35 | + <textarea id="text" rows="10" placeholder="Please enter your text here and click the Generate button"></textarea> | ||
| 36 | + <br/> | ||
| 37 | + <br/> | ||
| 38 | + <button id="generateBtn" disabled>Generate</button> | ||
| 39 | + </div> | ||
| 40 | + <section flex="1" overflow="auto" id="sound-clips"> | ||
| 41 | + </section> | ||
| 42 | + | ||
| 43 | + <script src="app.js"></script> | ||
| 44 | + <script src="sherpa-onnx.js"></script> | ||
| 45 | + <script src="sherpa-onnx-wasm-main.js"></script> | ||
| 46 | +</body> |
wasm/sherpa-onnx-wasm-main.cc
0 → 100644
| 1 | +// wasm/sherpa-onnx-wasm-main.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +#include <stdio.h> | ||
| 5 | + | ||
| 6 | +#include <algorithm> | ||
| 7 | +#include <memory> | ||
| 8 | + | ||
| 9 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 10 | + | ||
| 11 | +// see also | ||
| 12 | +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html | ||
| 13 | + | ||
| 14 | +extern "C" { | ||
| 15 | + | ||
| 16 | +static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 7 * 4, ""); | ||
| 17 | +static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == | ||
| 18 | + sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4, | ||
| 19 | + ""); | ||
| 20 | +static_assert(sizeof(SherpaOnnxOfflineTtsConfig) == | ||
| 21 | + sizeof(SherpaOnnxOfflineTtsModelConfig) + 2 * 4, | ||
| 22 | + ""); | ||
| 23 | + | ||
| 24 | +void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { | ||
| 25 | + auto tts_model_config = &tts_config->model; | ||
| 26 | + auto vits_model_config = &tts_model_config->vits; | ||
| 27 | + fprintf(stdout, "----------vits model config----------\n"); | ||
| 28 | + fprintf(stdout, "model: %s\n", vits_model_config->model); | ||
| 29 | + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon); | ||
| 30 | + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens); | ||
| 31 | + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir); | ||
| 32 | + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale); | ||
| 33 | + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w); | ||
| 34 | + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale); | ||
| 35 | + | ||
| 36 | + fprintf(stdout, "----------tts model config----------\n"); | ||
| 37 | + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads); | ||
| 38 | + fprintf(stdout, "debug: %d\n", tts_model_config->debug); | ||
| 39 | + fprintf(stdout, "provider: %s\n", tts_model_config->provider); | ||
| 40 | + | ||
| 41 | + fprintf(stdout, "----------tts config----------\n"); | ||
| 42 | + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts); | ||
| 43 | + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences); | ||
| 44 | +} | ||
| 45 | + | ||
| 46 | +void CopyHeap(const char *src, int32_t num_bytes, char *dst) { | ||
| 47 | + std::copy(src, src + num_bytes, dst); | ||
| 48 | +} | ||
| 49 | +} |
wasm/sherpa-onnx.js
0 → 100644
| 1 | + | ||
| 2 | +function freeConfig(config) { | ||
| 3 | + if ('buffer' in config) { | ||
| 4 | + _free(config.buffer); | ||
| 5 | + } | ||
| 6 | + | ||
| 7 | + if ('config' in config) { | ||
| 8 | + freeConfig(config.config) | ||
| 9 | + } | ||
| 10 | + | ||
| 11 | + _free(config.ptr); | ||
| 12 | +} | ||
| 13 | + | ||
| 14 | +// The user should free the returned pointers | ||
| 15 | +function initSherpaOnnxOfflineTtsVitsModelConfig(config) { | ||
| 16 | + let modelLen = lengthBytesUTF8(config.model) + 1; | ||
| 17 | + let lexiconLen = lengthBytesUTF8(config.lexicon) + 1; | ||
| 18 | + let tokensLen = lengthBytesUTF8(config.tokens) + 1; | ||
| 19 | + let dataDirLen = lengthBytesUTF8(config.dataDir) + 1; | ||
| 20 | + | ||
| 21 | + let n = modelLen + lexiconLen + tokensLen + dataDirLen; | ||
| 22 | + | ||
| 23 | + let buffer = _malloc(n); | ||
| 24 | + | ||
| 25 | + let len = 7 * 4; | ||
| 26 | + let ptr = _malloc(len); | ||
| 27 | + | ||
| 28 | + let offset = 0; | ||
| 29 | + stringToUTF8(config.model, buffer + offset, modelLen); | ||
| 30 | + offset += modelLen; | ||
| 31 | + | ||
| 32 | + stringToUTF8(config.lexicon, buffer + offset, lexiconLen); | ||
| 33 | + offset += lexiconLen; | ||
| 34 | + | ||
| 35 | + stringToUTF8(config.tokens, buffer + offset, tokensLen); | ||
| 36 | + offset += tokensLen; | ||
| 37 | + | ||
| 38 | + stringToUTF8(config.dataDir, buffer + offset, dataDirLen); | ||
| 39 | + offset += dataDirLen; | ||
| 40 | + | ||
| 41 | + offset = 0; | ||
| 42 | + setValue(ptr, buffer + offset, 'i8*'); | ||
| 43 | + offset += modelLen; | ||
| 44 | + | ||
| 45 | + setValue(ptr + 4, buffer + offset, 'i8*'); | ||
| 46 | + offset += lexiconLen; | ||
| 47 | + | ||
| 48 | + setValue(ptr + 8, buffer + offset, 'i8*'); | ||
| 49 | + offset += tokensLen; | ||
| 50 | + | ||
| 51 | + setValue(ptr + 12, buffer + offset, 'i8*'); | ||
| 52 | + offset += dataDirLen; | ||
| 53 | + | ||
| 54 | + setValue(ptr + 16, config.noiseScale, 'float'); | ||
| 55 | + setValue(ptr + 20, config.noiseScaleW, 'float'); | ||
| 56 | + setValue(ptr + 24, config.lengthScale, 'float'); | ||
| 57 | + | ||
| 58 | + return { | ||
| 59 | + buffer: buffer, ptr: ptr, len: len, | ||
| 60 | + } | ||
| 61 | +} | ||
| 62 | + | ||
| 63 | +function initSherpaOnnxOfflineTtsModelConfig(config) { | ||
| 64 | + let vitsModelConfig = | ||
| 65 | + initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig); | ||
| 66 | + | ||
| 67 | + let len = vitsModelConfig.len + 3 * 4; | ||
| 68 | + let ptr = _malloc(len); | ||
| 69 | + | ||
| 70 | + let offset = 0; | ||
| 71 | + _CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset); | ||
| 72 | + offset += vitsModelConfig.len; | ||
| 73 | + | ||
| 74 | + setValue(ptr + offset, config.numThreads, 'i32'); | ||
| 75 | + offset += 4; | ||
| 76 | + | ||
| 77 | + setValue(ptr + offset, config.debug, 'i32'); | ||
| 78 | + offset += 4; | ||
| 79 | + | ||
| 80 | + let providerLen = lengthBytesUTF8(config.provider) + 1; | ||
| 81 | + let buffer = _malloc(providerLen); | ||
| 82 | + stringToUTF8(config.provider, buffer, providerLen); | ||
| 83 | + setValue(ptr + offset, buffer, 'i8*'); | ||
| 84 | + | ||
| 85 | + return { | ||
| 86 | + buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig, | ||
| 87 | + } | ||
| 88 | +} | ||
| 89 | + | ||
| 90 | +function initSherpaOnnxOfflineTtsConfig(config) { | ||
| 91 | + let modelConfig = | ||
| 92 | + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig); | ||
| 93 | + let len = modelConfig.len + 2 * 4; | ||
| 94 | + let ptr = _malloc(len); | ||
| 95 | + | ||
| 96 | + let offset = 0; | ||
| 97 | + _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset); | ||
| 98 | + offset += modelConfig.len; | ||
| 99 | + | ||
| 100 | + let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1; | ||
| 101 | + let buffer = _malloc(ruleFstsLen); | ||
| 102 | + stringToUTF8(config.ruleFsts, buffer, ruleFstsLen); | ||
| 103 | + setValue(ptr + offset, buffer, 'i8*'); | ||
| 104 | + offset += 4; | ||
| 105 | + | ||
| 106 | + setValue(ptr + offset, config.maxNumSentences, 'i32'); | ||
| 107 | + | ||
| 108 | + return { | ||
| 109 | + buffer: buffer, ptr: ptr, len: len, config: modelConfig, | ||
| 110 | + } | ||
| 111 | +} | ||
| 112 | + | ||
| 113 | +class OfflineTts { | ||
| 114 | + constructor(configObj) { | ||
| 115 | + let config = initSherpaOnnxOfflineTtsConfig(configObj) | ||
| 116 | + let handle = _SherpaOnnxCreateOfflineTts(config.ptr); | ||
| 117 | + | ||
| 118 | + freeConfig(config); | ||
| 119 | + | ||
| 120 | + this.handle = handle; | ||
| 121 | + this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle); | ||
| 122 | + this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle); | ||
| 123 | + } | ||
| 124 | + | ||
| 125 | + free() { | ||
| 126 | + _SherpaOnnxDestroyOfflineTts(this.handle); | ||
| 127 | + this.handle = 0 | ||
| 128 | + } | ||
| 129 | + | ||
| 130 | + // { | ||
| 131 | + // text: "hello", | ||
| 132 | + // sid: 1, | ||
| 133 | + // speed: 1.0 | ||
| 134 | + // } | ||
| 135 | + generate(config) { | ||
| 136 | + let textLen = lengthBytesUTF8(config.text) + 1; | ||
| 137 | + let textPtr = _malloc(textLen); | ||
| 138 | + stringToUTF8(config.text, textPtr, textLen); | ||
| 139 | + | ||
| 140 | + let h = _SherpaOnnxOfflineTtsGenerate( | ||
| 141 | + this.handle, textPtr, config.sid, config.speed); | ||
| 142 | + | ||
| 143 | + let numSamples = HEAP32[h / 4 + 1]; | ||
| 144 | + let sampleRate = HEAP32[h / 4 + 2]; | ||
| 145 | + | ||
| 146 | + let samplesPtr = HEAP32[h / 4] / 4; | ||
| 147 | + let samples = new Float32Array(numSamples); | ||
| 148 | + for (let i = 0; i < numSamples; i++) { | ||
| 149 | + samples[i] = HEAPF32[samplesPtr + i]; | ||
| 150 | + } | ||
| 151 | + | ||
| 152 | + _SherpaOnnxDestroyOfflineTtsGeneratedAudio(h); | ||
| 153 | + return {samples: samples, sampleRate: sampleRate}; | ||
| 154 | + } | ||
| 155 | +} | ||
| 156 | + | ||
| 157 | +function initSherpaOnnxOfflineTts() { | ||
| 158 | + let offlineTtsVitsModelConfig = { | ||
| 159 | + model: './model.onnx', | ||
| 160 | + lexicon: '', | ||
| 161 | + tokens: './tokens.txt', | ||
| 162 | + dataDir: './espeak-ng-data', | ||
| 163 | + noiseScale: 0.667, | ||
| 164 | + noiseScaleW: 0.8, | ||
| 165 | + lengthScale: 1.0, | ||
| 166 | + }; | ||
| 167 | + let offlineTtsModelConfig = { | ||
| 168 | + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig, | ||
| 169 | + numThreads: 1, | ||
| 170 | + debug: 1, | ||
| 171 | + provider: 'cpu', | ||
| 172 | + }; | ||
| 173 | + let offlineTtsConfig = { | ||
| 174 | + offlineTtsModelConfig: offlineTtsModelConfig, | ||
| 175 | + ruleFsts: '', | ||
| 176 | + maxNumSentences: 1, | ||
| 177 | + } | ||
| 178 | + | ||
| 179 | + return new OfflineTts(offlineTtsConfig); | ||
| 180 | +} |
-
请 注册 或 登录 后发表评论