Fangjun Kuang
Committed by GitHub

Support WebAssembly for text-to-speech (#577)

  1 +name: wasm-simd-hf-space-en
  2 +
  3 +on:
  4 + push:
  5 + branches:
  6 + - wasm-2
  7 + release:
  8 + types:
  9 + - published
  10 +
  11 + workflow_dispatch:
  12 +
  13 +concurrency:
  14 + group: wasm-simd-hf-space-en-${{ github.ref }}
  15 + cancel-in-progress: true
  16 +
  17 +jobs:
  18 + wasm-simd-hf-space-en:
  19 + runs-on: ${{ matrix.os }}
  20 + strategy:
  21 + fail-fast: false
  22 + matrix:
  23 + os: [ubuntu-latest]
  24 +
  25 + steps:
  26 + - uses: actions/checkout@v4
  27 + with:
  28 + fetch-depth: 0
  29 + - name: Install emsdk
  30 + uses: mymindstorm/setup-emsdk@v14
  31 +
  32 + - name: View emsdk version
  33 + shell: bash
  34 + run: |
  35 + emcc -v
  36 + echo "--------------------"
  37 + emcc --check
  38 +
  39 + - name: Download model files
  40 + shell: bash
  41 + run: |
  42 + cd wasm/assets
  43 + ls -lh
  44 + echo "----------"
  45 + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  46 + tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
  47 + rm vits-piper-en_US-libritts_r-medium.tar.bz2
  48 + mv vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx ./model.onnx
  49 + mv vits-piper-en_US-libritts_r-medium/tokens.txt ./
  50 + mv vits-piper-en_US-libritts_r-medium/espeak-ng-data ./
  51 + rm -rf vits-piper-en_US-libritts_r-medium
  52 +
  53 + ls -lh
  54 +
  55 + - name: Build sherpa-onnx for WebAssembly
  56 + shell: bash
  57 + run: |
  58 + ./build-wasm-simd.sh
  59 +
  60 + - name: collect files
  61 + shell: bash
  62 + run: |
  63 + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  64 +
  65 + mv build-wasm-simd/install/bin/wasm sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en
  66 + ls -lh sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en
  67 + tar cjfv sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en.tar.bz2 ./sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en
  68 +
  69 + - name: Upload wasm files
  70 + uses: actions/upload-artifact@v4
  71 + with:
  72 + name: sherpa-onnx-wasm-simd-en
  73 + path: ./sherpa-onnx-wasm-simd-*.tar.bz2
  74 +
  75 + - name: Publish to huggingface
  76 + env:
  77 + HF_TOKEN: ${{ secrets.HF_TOKEN }}
  78 + uses: nick-fields/retry@v2
  79 + with:
  80 + max_attempts: 20
  81 + timeout_seconds: 200
  82 + shell: bash
  83 + command: |
  84 + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2)
  85 +
  86 + git config --global user.email "csukuangfj@gmail.com"
  87 + git config --global user.name "Fangjun Kuang"
  88 +
  89 + rm -rf huggingface
  90 + export GIT_LFS_SKIP_SMUDGE=1
  91 +
  92 + git clone https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en huggingface
  93 + cd huggingface
  94 + git fetch
  95 + git pull
  96 + git merge -m "merge remote" --ff origin main
  97 +
  98 + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-en/* .
  99 +
  100 + git status
  101 + git lfs track "*.data"
  102 + git lfs track "*.wasm"
  103 + ls -lh
  104 +
  105 + git add .
  106 + git commit -m "update model"
  107 + git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en main
@@ -20,6 +20,8 @@ option(SHERPA_ONNX_ENABLE_JNI "Whether to build JNI internface" OFF) @@ -20,6 +20,8 @@ option(SHERPA_ONNX_ENABLE_JNI "Whether to build JNI internface" OFF)
20 option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON) 20 option(SHERPA_ONNX_ENABLE_C_API "Whether to build C API" ON)
21 option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON) 21 option(SHERPA_ONNX_ENABLE_WEBSOCKET "Whether to build webscoket server/client" ON)
22 option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF) 22 option(SHERPA_ONNX_ENABLE_GPU "Enable ONNX Runtime GPU support" OFF)
  23 +option(SHERPA_ONNX_ENABLE_WASM "Whether to enable WASM" OFF)
  24 +option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
23 option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON) 25 option(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY "True to link libstdc++ statically. Used only when BUILD_SHARED_LIBS is OFF on Linux" ON)
24 26
25 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib") 27 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/lib")
@@ -99,6 +101,10 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}") @@ -99,6 +101,10 @@ message(STATUS "SHERPA_ONNX_ENABLE_JNI ${SHERPA_ONNX_ENABLE_JNI}")
99 message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") 101 message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
100 message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") 102 message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
101 message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") 103 message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
  104 +message(STATUS "SHERPA_ONNX_ENABLE_WASM ${SHERPA_ONNX_ENABLE_WASM}")
  105 +if(SHERPA_ONNX_ENABLE_WASM)
  106 + add_definitions(-DSHERPA_ONNX_ENABLE_WASM=1)
  107 +endif()
102 108
103 if(NOT CMAKE_CXX_STANDARD) 109 if(NOT CMAKE_CXX_STANDARD)
104 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") 110 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
@@ -109,7 +115,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}") @@ -109,7 +115,7 @@ message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
109 115
110 include(CheckIncludeFileCXX) 116 include(CheckIncludeFileCXX)
111 117
112 -if(UNIX AND NOT APPLE) 118 +if(UNIX AND NOT APPLE AND NOT SHERPA_ONNX_ENABLE_WASM)
113 check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) 119 check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
114 if(SHERPA_ONNX_HAS_ALSA) 120 if(SHERPA_ONNX_HAS_ALSA)
115 add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1) 121 add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)
@@ -160,6 +166,11 @@ endif() @@ -160,6 +166,11 @@ endif()
160 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) 166 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
161 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) 167 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
162 168
  169 +if(SHERPA_ONNX_ENABLE_WASM)
  170 + # Enable it for debugging in case there is something wrong.
  171 + # string(APPEND CMAKE_CXX_FLAGS " -g4 -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=1 ")
  172 +endif()
  173 +
163 if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux) 174 if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux)
164 if(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY) 175 if(SHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY)
165 message(STATUS "Link libstdc++ statically") 176 message(STATUS "Link libstdc++ statically")
@@ -200,9 +211,14 @@ include(piper-phonemize) @@ -200,9 +211,14 @@ include(piper-phonemize)
200 211
201 add_subdirectory(sherpa-onnx) 212 add_subdirectory(sherpa-onnx)
202 213
203 -if(SHERPA_ONNX_ENABLE_C_API) 214 +if(SHERPA_ONNX_ENABLE_C_API AND SHERPA_ONNX_ENABLE_BINARY)
204 add_subdirectory(c-api-examples) 215 add_subdirectory(c-api-examples)
205 endif() 216 endif()
  217 +
  218 +if(SHERPA_ONNX_ENABLE_WASM)
  219 + add_subdirectory(wasm)
  220 +endif()
  221 +
206 message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") 222 message(STATUS "CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
207 223
208 if(NOT BUILD_SHARED_LIBS) 224 if(NOT BUILD_SHARED_LIBS)
  1 +#!/usr/bin/env bash
  2 +# Copyright (c) 2024 Xiaomi Corporation
  3 +#
  4 +# This script is to build sherpa-onnx for WebAssembly
  5 +
  6 +set -ex
  7 +
  8 +if [ x"$EMSCRIPTEN" == x"" ]; then
  9 + if ! command -v emcc &> /dev/null; then
  10 + echo "Please install emscripten first"
  11 + echo ""
  12 + echo "You can use the following commands to install it:"
  13 + echo ""
  14 + echo "git clone https://github.com/emscripten-core/emsdk.git"
  15 + echo "cd emsdk"
  16 + echo "git pull"
  17 + echo "./emsdk install latest"
  18 + echo "./emsdk activate latest"
  19 + echo "source ./emsdk_env.sh"
  20 + exit 1
  21 + else
  22 + EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
  23 + fi
  24 +fi
  25 +
  26 +export EMSCRIPTEN=$EMSCRIPTEN
  27 +echo "EMSCRIPTEN: $EMSCRIPTEN"
  28 +if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
  29 + echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
  30 + echo "Please make sure you have installed emsdk correctly"
  31 + exit 1
  32 +fi
  33 +
  34 +mkdir -p build-wasm-simd
  35 +pushd build-wasm-simd
  36 +
  37 +export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
  38 +
  39 +cmake \
  40 + -DCMAKE_INSTALL_PREFIX=./install \
  41 + -DCMAKE_BUILD_TYPE=Release \
  42 + -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
  43 + \
  44 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  45 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  46 + -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  47 + -DBUILD_SHARED_LIBS=OFF \
  48 + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  49 + -DSHERPA_ONNX_ENABLE_JNI=OFF \
  50 + -DSHERPA_ONNX_ENABLE_C_API=ON \
  51 + -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
  52 + -DSHERPA_ONNX_ENABLE_GPU=OFF \
  53 + -DSHERPA_ONNX_ENABLE_WASM=ON \
  54 + -DSHERPA_ONNX_ENABLE_BINARY=OFF \
  55 + -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
  56 + ..
  57 +make -j2
  58 +make install
1 function(download_espeak_ng_for_piper) 1 function(download_espeak_ng_for_piper)
2 include(FetchContent) 2 include(FetchContent)
3 3
4 - set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip")  
5 - set(espeak_ng_URL2 "")  
6 - set(espeak_ng_HASH "SHA256=8a48251e6926133dd91fcf6cb210c7c2e290a9b578d269446e2d32d710b0dfa0") 4 + set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/69bf6927964fb042aeb827cfdf6082a30f5802eb.zip")
  5 + set(espeak_ng_URL2 "https://hub.nuaa.cf/csukuangfj/espeak-ng/archive/69bf6927964fb042aeb827cfdf6082a30f5802eb.zip")
  6 + set(espeak_ng_HASH "SHA256=745e35b21ece6804b4a1839722f9e625ac909380c8f85873ad71bf145877075a")
7 7
8 set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE) 8 set(BUILD_ESPEAK_NG_TESTS OFF CACHE BOOL "" FORCE)
9 set(USE_ASYNC OFF CACHE BOOL "" FORCE) 9 set(USE_ASYNC OFF CACHE BOOL "" FORCE)
@@ -15,14 +15,18 @@ function(download_espeak_ng_for_piper) @@ -15,14 +15,18 @@ function(download_espeak_ng_for_piper)
15 set(EXTRA_cmn ON CACHE BOOL "" FORCE) 15 set(EXTRA_cmn ON CACHE BOOL "" FORCE)
16 set(EXTRA_ru ON CACHE BOOL "" FORCE) 16 set(EXTRA_ru ON CACHE BOOL "" FORCE)
17 17
  18 + if(SHERPA_ONNX_ENABLE_WASM)
  19 + set(BUILD_ESPEAK_NG_EXE OFF CACHE BOOL "" FORCE)
  20 + endif()
  21 +
18 # If you don't have access to the Internet, 22 # If you don't have access to the Internet,
19 # please pre-download kaldi-decoder 23 # please pre-download kaldi-decoder
20 set(possible_file_locations 24 set(possible_file_locations
21 - $ENV{HOME}/Downloads/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip  
22 - ${CMAKE_SOURCE_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip  
23 - ${CMAKE_BINARY_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip  
24 - /tmp/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip  
25 - /star-fj/fangjun/download/github/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip 25 + $ENV{HOME}/Downloads/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip
  26 + ${CMAKE_SOURCE_DIR}/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip
  27 + ${CMAKE_BINARY_DIR}/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip
  28 + /tmp/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip
  29 + /star-fj/fangjun/download/github/espeak-ng-69bf6927964fb042aeb827cfdf6082a30f5802eb.zip
26 ) 30 )
27 31
28 foreach(f IN LISTS possible_file_locations) 32 foreach(f IN LISTS possible_file_locations)
1 function(download_kaldi_decoder) 1 function(download_kaldi_decoder)
2 include(FetchContent) 2 include(FetchContent)
3 3
4 - set(kaldi_decoder_URL "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.3.tar.gz")  
5 - set(kaldi_decoder_URL2 "https://huggingface.co/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-decoder-0.2.3.tar.gz")  
6 - set(kaldi_decoder_HASH "SHA256=98bf445a5b7961ccf3c3522317d900054eaadb6a9cdcf4531e7d9caece94a56d") 4 + set(kaldi_decoder_URL "https://github.com/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.4.tar.gz")
  5 + set(kaldi_decoder_URL2 "https://hub.nuaa.cf/k2-fsa/kaldi-decoder/archive/refs/tags/v0.2.4.tar.gz")
  6 + set(kaldi_decoder_HASH "SHA256=136d96c2f1f8ec44de095205f81a6ce98981cd867fe4ba840f9415a0b58fe601")
7 7
8 set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE) 8 set(KALDI_DECODER_BUILD_PYTHON OFF CACHE BOOL "" FORCE)
9 set(KALDI_DECODER_ENABLE_TESTS OFF CACHE BOOL "" FORCE) 9 set(KALDI_DECODER_ENABLE_TESTS OFF CACHE BOOL "" FORCE)
@@ -12,11 +12,11 @@ function(download_kaldi_decoder) @@ -12,11 +12,11 @@ function(download_kaldi_decoder)
12 # If you don't have access to the Internet, 12 # If you don't have access to the Internet,
13 # please pre-download kaldi-decoder 13 # please pre-download kaldi-decoder
14 set(possible_file_locations 14 set(possible_file_locations
15 - $ENV{HOME}/Downloads/kaldi-decoder-0.2.3.tar.gz  
16 - ${CMAKE_SOURCE_DIR}/kaldi-decoder-0.2.3.tar.gz  
17 - ${CMAKE_BINARY_DIR}/kaldi-decoder-0.2.3.tar.gz  
18 - /tmp/kaldi-decoder-0.2.3.tar.gz  
19 - /star-fj/fangjun/download/github/kaldi-decoder-0.2.3.tar.gz 15 + $ENV{HOME}/Downloads/kaldi-decoder-0.2.4.tar.gz
  16 + ${CMAKE_SOURCE_DIR}/kaldi-decoder-0.2.4.tar.gz
  17 + ${CMAKE_BINARY_DIR}/kaldi-decoder-0.2.4.tar.gz
  18 + /tmp/kaldi-decoder-0.2.4.tar.gz
  19 + /star-fj/fangjun/download/github/kaldi-decoder-0.2.4.tar.gz
20 ) 20 )
21 21
22 foreach(f IN LISTS possible_file_locations) 22 foreach(f IN LISTS possible_file_locations)
1 function(download_kaldifst) 1 function(download_kaldifst)
2 include(FetchContent) 2 include(FetchContent)
3 3
4 - set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.9.tar.gz")  
5 - set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.9.tar.gz")  
6 - set(kaldifst_HASH "SHA256=8c653021491dca54c38ab659565edfab391418a79ae87099257863cd5664dd39") 4 + set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.10.tar.gz")
  5 + set(kaldifst_URL2 "https://hub.nuaa.cf/k2-fsa/kaldifst/archive/refs/tags/v1.7.10.tar.gz")
  6 + set(kaldifst_HASH "SHA256=7f7b3173a6584a6b1987f65ae7af2ac453d66b845f875a9d31074b8d2cd0de54")
7 7
8 # If you don't have access to the Internet, 8 # If you don't have access to the Internet,
9 # please pre-download kaldifst 9 # please pre-download kaldifst
10 set(possible_file_locations 10 set(possible_file_locations
11 - $ENV{HOME}/Downloads/kaldifst-1.7.9.tar.gz  
12 - ${CMAKE_SOURCE_DIR}/kaldifst-1.7.9.tar.gz  
13 - ${CMAKE_BINARY_DIR}/kaldifst-1.7.9.tar.gz  
14 - /tmp/kaldifst-1.7.9.tar.gz  
15 - /star-fj/fangjun/download/github/kaldifst-1.7.9.tar.gz 11 + $ENV{HOME}/Downloads/kaldifst-1.7.10.tar.gz
  12 + ${CMAKE_SOURCE_DIR}/kaldifst-1.7.10.tar.gz
  13 + ${CMAKE_BINARY_DIR}/kaldifst-1.7.10.tar.gz
  14 + /tmp/kaldifst-1.7.10.tar.gz
  15 + /star-fj/fangjun/download/github/kaldifst-1.7.10.tar.gz
16 ) 16 )
17 17
18 foreach(f IN LISTS possible_file_locations) 18 foreach(f IN LISTS possible_file_locations)
  1 +# Copyright (c) 2022-2024 Xiaomi Corporation
  2 +message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
  3 +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
  4 +
  5 +if(NOT SHERPA_ONNX_ENABLE_WASM)
  6 + message(FATAL_ERROR "This file is for WebAssembly.")
  7 +endif()
  8 +
  9 +if(BUILD_SHARED_LIBS)
  10 + message(FATAL_ERROR "BUILD_SHARED_LIBS should be OFF for WebAssembly")
  11 +endif()
  12 +
  13 +set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.17.0/onnxruntime-wasm-static_lib-simd-1.17.0.zip")
  14 +set(onnxruntime_URL2 "https://hub.nuaa.cf/csukuangfj/onnxruntime-libs/releases/download/v1.17.0/onnxruntime-wasm-static_lib-simd-1.17.0.zip")
  15 +set(onnxruntime_HASH "SHA256=0ee6120d2ade093eff731af792fd137ac2db580eb2dc5b8bf39e0897b0d7afd9")
  16 +
  17 +# If you don't have access to the Internet,
  18 +# please download onnxruntime to one of the following locations.
  19 +# You can add more if you want.
  20 +set(possible_file_locations
  21 + $ENV{HOME}/Downloads/onnxruntime-wasm-static_lib-simd-1.17.0.zip
  22 + ${CMAKE_SOURCE_DIR}/onnxruntime-wasm-static_lib-simd-1.17.0.zip
  23 + ${CMAKE_BINARY_DIR}/onnxruntime-wasm-static_lib-simd-1.17.0.zip
  24 + /tmp/onnxruntime-wasm-static_lib-simd-1.17.0.zip
  25 + /star-fj/fangjun/download/github/onnxruntime-wasm-static_lib-simd-1.17.0.zip
  26 +)
  27 +
  28 +foreach(f IN LISTS possible_file_locations)
  29 + if(EXISTS ${f})
  30 + set(onnxruntime_URL "${f}")
  31 + file(TO_CMAKE_PATH "${onnxruntime_URL}" onnxruntime_URL)
  32 + message(STATUS "Found local downloaded onnxruntime: ${onnxruntime_URL}")
  33 + set(onnxruntime_URL2)
  34 + break()
  35 + endif()
  36 +endforeach()
  37 +
  38 +FetchContent_Declare(onnxruntime
  39 + URL
  40 + ${onnxruntime_URL}
  41 + ${onnxruntime_URL2}
  42 + URL_HASH ${onnxruntime_HASH}
  43 +)
  44 +
  45 +FetchContent_GetProperties(onnxruntime)
  46 +if(NOT onnxruntime_POPULATED)
  47 + message(STATUS "Downloading onnxruntime from ${onnxruntime_URL}")
  48 + FetchContent_Populate(onnxruntime)
  49 +endif()
  50 +message(STATUS "onnxruntime is downloaded to ${onnxruntime_SOURCE_DIR}")
  51 +
  52 +# for static libraries, we use onnxruntime_lib_files directly below
  53 +include_directories(${onnxruntime_SOURCE_DIR}/include)
  54 +
  55 +file(GLOB onnxruntime_lib_files "${onnxruntime_SOURCE_DIR}/lib/lib*.a")
  56 +
  57 +set(onnxruntime_lib_files ${onnxruntime_lib_files} PARENT_SCOPE)
  58 +
  59 +message(STATUS "onnxruntime lib files: ${onnxruntime_lib_files}")
  60 +install(FILES ${onnxruntime_lib_files} DESTINATION lib)
@@ -4,8 +4,9 @@ function(download_onnxruntime) @@ -4,8 +4,9 @@ function(download_onnxruntime)
4 4
5 message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}") 5 message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
6 message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") 6 message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
7 -  
8 - if(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) 7 + if(SHERPA_ONNX_ENABLE_WASM)
  8 + include(onnxruntime-wasm-simd)
  9 + elseif(CMAKE_SYSTEM_NAME STREQUAL Linux AND CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
9 if(BUILD_SHARED_LIBS) 10 if(BUILD_SHARED_LIBS)
10 include(onnxruntime-linux-aarch64) 11 include(onnxruntime-linux-aarch64)
11 else() 12 else()
@@ -12,4 +12,3 @@ install(TARGETS sherpa-onnx-c-api DESTINATION lib) @@ -12,4 +12,3 @@ install(TARGETS sherpa-onnx-c-api DESTINATION lib)
12 install(FILES c-api.h 12 install(FILES c-api.h
13 DESTINATION include/sherpa-onnx/c-api 13 DESTINATION include/sherpa-onnx/c-api
14 ) 14 )
15 -  
@@ -11,6 +11,7 @@ @@ -11,6 +11,7 @@
11 11
12 #include "sherpa-onnx/csrc/circular-buffer.h" 12 #include "sherpa-onnx/csrc/circular-buffer.h"
13 #include "sherpa-onnx/csrc/display.h" 13 #include "sherpa-onnx/csrc/display.h"
  14 +#include "sherpa-onnx/csrc/macros.h"
14 #include "sherpa-onnx/csrc/offline-recognizer.h" 15 #include "sherpa-onnx/csrc/offline-recognizer.h"
15 #include "sherpa-onnx/csrc/offline-tts.h" 16 #include "sherpa-onnx/csrc/offline-tts.h"
16 #include "sherpa-onnx/csrc/online-recognizer.h" 17 #include "sherpa-onnx/csrc/online-recognizer.h"
@@ -90,7 +91,7 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( @@ -90,7 +91,7 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
90 SHERPA_ONNX_OR(config->hotwords_score, 1.5); 91 SHERPA_ONNX_OR(config->hotwords_score, 1.5);
91 92
92 if (config->model_config.debug) { 93 if (config->model_config.debug) {
93 - fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); 94 + SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
94 } 95 }
95 96
96 SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; 97 SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer;
@@ -320,7 +321,7 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( @@ -320,7 +321,7 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
320 SHERPA_ONNX_OR(config->hotwords_score, 1.5); 321 SHERPA_ONNX_OR(config->hotwords_score, 1.5);
321 322
322 if (config->model_config.debug) { 323 if (config->model_config.debug) {
323 - fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); 324 + SHERPA_ONNX_LOGE("%s", recognizer_config.ToString().c_str());
324 } 325 }
325 326
326 SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; 327 SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer;
@@ -476,7 +477,7 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( @@ -476,7 +477,7 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
476 vad_config.debug = SHERPA_ONNX_OR(config->debug, false); 477 vad_config.debug = SHERPA_ONNX_OR(config->debug, false);
477 478
478 if (vad_config.debug) { 479 if (vad_config.debug) {
479 - fprintf(stderr, "%s\n", vad_config.ToString().c_str()); 480 + SHERPA_ONNX_LOGE("%s", vad_config.ToString().c_str());
480 } 481 }
481 482
482 SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector; 483 SherpaOnnxVoiceActivityDetector *p = new SherpaOnnxVoiceActivityDetector;
@@ -566,7 +567,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( @@ -566,7 +567,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
566 tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2); 567 tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
567 568
568 if (tts_config.model.debug) { 569 if (tts_config.model.debug) {
569 - fprintf(stderr, "%s\n", tts_config.ToString().c_str()); 570 + SHERPA_ONNX_LOGE("%s\n", tts_config.ToString().c_str());
570 } 571 }
571 572
572 SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts; 573 SherpaOnnxOfflineTts *tts = new SherpaOnnxOfflineTts;
@@ -582,6 +583,10 @@ int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) { @@ -582,6 +583,10 @@ int32_t SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts) {
582 return tts->impl->SampleRate(); 583 return tts->impl->SampleRate();
583 } 584 }
584 585
  586 +int32_t SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts) {
  587 + return tts->impl->NumSpeakers();
  588 +}
  589 +
585 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( 590 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
586 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, 591 const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
587 float speed) { 592 float speed) {
@@ -658,6 +658,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts); @@ -658,6 +658,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
658 SHERPA_ONNX_API int32_t 658 SHERPA_ONNX_API int32_t
659 SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts); 659 SherpaOnnxOfflineTtsSampleRate(const SherpaOnnxOfflineTts *tts);
660 660
  661 +// Return the number of speakers of the current TTS object
  662 +SHERPA_ONNX_API int32_t
  663 +SherpaOnnxOfflineTtsNumSpeakers(const SherpaOnnxOfflineTts *tts);
  664 +
661 // Generate audio from the given text and speaker id (sid). 665 // Generate audio from the given text and speaker id (sid).
662 // The user has to use DestroyOfflineTtsGeneratedAudio() to free the 666 // The user has to use DestroyOfflineTtsGeneratedAudio() to free the
663 // returned pointer to avoid memory leak. 667 // returned pointer to avoid memory leak.
@@ -128,9 +128,6 @@ if(APPLE) @@ -128,9 +128,6 @@ if(APPLE)
128 ) 128 )
129 endif() 129 endif()
130 130
131 -if(NOT WIN32)  
132 - target_link_libraries(sherpa-onnx-core -pthread)  
133 -endif()  
134 131
135 if(ANDROID_NDK) 132 if(ANDROID_NDK)
136 target_link_libraries(sherpa-onnx-core android log) 133 target_link_libraries(sherpa-onnx-core android log)
@@ -172,36 +169,42 @@ if(SHERPA_ONNX_ENABLE_CHECK) @@ -172,36 +169,42 @@ if(SHERPA_ONNX_ENABLE_CHECK)
172 endif() 169 endif()
173 170
174 if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux) 171 if(NOT BUILD_SHARED_LIBS AND CMAKE_SYSTEM_NAME STREQUAL Linux)
175 - target_link_libraries(sherpa-onnx-core -pthread -ldl) 172 + target_link_libraries(sherpa-onnx-core -ldl)
176 endif() 173 endif()
177 174
178 -add_executable(sherpa-onnx sherpa-onnx.cc)  
179 -add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc)  
180 -add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc)  
181 -add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc)  
182 -add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)  
183 -  
184 -set(main_exes  
185 - sherpa-onnx  
186 - sherpa-onnx-keyword-spotter  
187 - sherpa-onnx-offline  
188 - sherpa-onnx-offline-parallel  
189 - sherpa-onnx-offline-tts  
190 -) 175 +if(NOT WIN32 AND NOT SHERPA_ONNX_ENABLE_WASM AND CMAKE_SYSTEM_NAME STREQUAL Linux)
  176 + target_link_libraries(sherpa-onnx-core -pthread)
  177 +endif()
191 178
192 -foreach(exe IN LISTS main_exes)  
193 - target_link_libraries(${exe} sherpa-onnx-core)  
194 -endforeach() 179 +if(SHERPA_ONNX_ENABLE_BINARY)
  180 + add_executable(sherpa-onnx sherpa-onnx.cc)
  181 + add_executable(sherpa-onnx-keyword-spotter sherpa-onnx-keyword-spotter.cc)
  182 + add_executable(sherpa-onnx-offline sherpa-onnx-offline.cc)
  183 + add_executable(sherpa-onnx-offline-parallel sherpa-onnx-offline-parallel.cc)
  184 + add_executable(sherpa-onnx-offline-tts sherpa-onnx-offline-tts.cc)
  185 +
  186 + set(main_exes
  187 + sherpa-onnx
  188 + sherpa-onnx-keyword-spotter
  189 + sherpa-onnx-offline
  190 + sherpa-onnx-offline-parallel
  191 + sherpa-onnx-offline-tts
  192 + )
195 193
196 -if(NOT WIN32)  
197 foreach(exe IN LISTS main_exes) 194 foreach(exe IN LISTS main_exes)
198 - target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")  
199 - target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")  
200 -  
201 - if(SHERPA_ONNX_ENABLE_PYTHON)  
202 - target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")  
203 - endif() 195 + target_link_libraries(${exe} sherpa-onnx-core)
204 endforeach() 196 endforeach()
  197 +
  198 + if(NOT WIN32)
  199 + foreach(exe IN LISTS main_exes)
  200 + target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib")
  201 + target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../../../sherpa_onnx/lib")
  202 +
  203 + if(SHERPA_ONNX_ENABLE_PYTHON)
  204 + target_link_libraries(${exe} "-Wl,-rpath,${SHERPA_ONNX_RPATH_ORIGIN}/../lib/python${PYTHON_VERSION}/site-packages/sherpa_onnx/lib")
  205 + endif()
  206 + endforeach()
  207 + endif()
205 endif() 208 endif()
206 209
207 if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32) 210 if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32)
@@ -214,14 +217,16 @@ if(WIN32 AND BUILD_SHARED_LIBS) @@ -214,14 +217,16 @@ if(WIN32 AND BUILD_SHARED_LIBS)
214 install(TARGETS sherpa-onnx-core DESTINATION bin) 217 install(TARGETS sherpa-onnx-core DESTINATION bin)
215 endif() 218 endif()
216 219
217 -install(  
218 - TARGETS  
219 - ${main_exes}  
220 - DESTINATION  
221 - bin  
222 -) 220 +if(SHERPA_ONNX_ENABLE_BINARY)
  221 + install(
  222 + TARGETS
  223 + ${main_exes}
  224 + DESTINATION
  225 + bin
  226 + )
  227 +endif()
223 228
224 -if(SHERPA_ONNX_HAS_ALSA) 229 +if(SHERPA_ONNX_HAS_ALSA AND SHERPA_ONNX_ENABLE_BINARY)
225 add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc) 230 add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
226 add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc) 231 add_executable(sherpa-onnx-offline-tts-play-alsa sherpa-onnx-offline-tts-play-alsa.cc alsa-play.cc)
227 232
@@ -261,7 +266,7 @@ if(SHERPA_ONNX_HAS_ALSA) @@ -261,7 +266,7 @@ if(SHERPA_ONNX_HAS_ALSA)
261 ) 266 )
262 endif() 267 endif()
263 268
264 -if(SHERPA_ONNX_ENABLE_PORTAUDIO) 269 +if(SHERPA_ONNX_ENABLE_PORTAUDIO AND SHERPA_ONNX_ENABLE_BINARY)
265 add_executable(sherpa-onnx-offline-tts-play 270 add_executable(sherpa-onnx-offline-tts-play
266 sherpa-onnx-offline-tts-play.cc 271 sherpa-onnx-offline-tts-play.cc
267 microphone.cc 272 microphone.cc
@@ -330,7 +335,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO) @@ -330,7 +335,7 @@ if(SHERPA_ONNX_ENABLE_PORTAUDIO)
330 ) 335 )
331 endif() 336 endif()
332 337
333 -if(SHERPA_ONNX_ENABLE_WEBSOCKET) 338 +if(SHERPA_ONNX_ENABLE_WEBSOCKET AND SHERPA_ONNX_ENABLE_BINARY)
334 add_definitions(-DASIO_STANDALONE) 339 add_definitions(-DASIO_STANDALONE)
335 add_definitions(-D_WEBSOCKETPP_CPP11_STL_) 340 add_definitions(-D_WEBSOCKETPP_CPP11_STL_)
336 341
@@ -16,6 +16,14 @@ @@ -16,6 +16,14 @@
16 fprintf(stderr, "\n"); \ 16 fprintf(stderr, "\n"); \
17 __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__); \ 17 __android_log_print(ANDROID_LOG_WARN, "sherpa-onnx", ##__VA_ARGS__); \
18 } while (0) 18 } while (0)
  19 +#elif SHERPA_ONNX_ENABLE_WASM
  20 +#define SHERPA_ONNX_LOGE(...) \
  21 + do { \
  22 + fprintf(stdout, "%s:%s:%d ", __FILE__, __func__, \
  23 + static_cast<int>(__LINE__)); \
  24 + fprintf(stdout, ##__VA_ARGS__); \
  25 + fprintf(stdout, "\n"); \
  26 + } while (0)
19 #else 27 #else
20 #define SHERPA_ONNX_LOGE(...) \ 28 #define SHERPA_ONNX_LOGE(...) \
21 do { \ 29 do { \
  1 +if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  2 + message(FATAL_ERROR "Please use ./build-wasm.sh to build for wasm")
  3 +endif()
  4 +
  5 +if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin")
  6 + message(WARNING "${CMAKE_CURRENT_SOURCE_DIR}/assets/decoder_jit_trace-pnnx.ncnn.bin does not exist")
  7 + # message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
  8 +endif()
  9 +
  10 +set(exported_functions
  11 + MyPrint
  12 + SherpaOnnxCreateOfflineTts
  13 + SherpaOnnxDestroyOfflineTts
  14 + SherpaOnnxDestroyOfflineTtsGeneratedAudio
  15 + SherpaOnnxOfflineTtsGenerate
  16 + SherpaOnnxOfflineTtsGenerateWithCallback
  17 + SherpaOnnxOfflineTtsNumSpeakers
  18 + SherpaOnnxOfflineTtsSampleRate
  19 + SherpaOnnxWriteWave
  20 +)
  21 +set(mangled_exported_functions)
  22 +foreach(x IN LISTS exported_functions)
  23 + list(APPEND mangled_exported_functions "_${x}")
  24 +endforeach()
  25 +list(JOIN mangled_exported_functions "," all_exported_functions)
  26 +
  27 +
  28 +include_directories(${CMAKE_SOURCE_DIR})
  29 +set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
  30 +string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
  31 +string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
  32 +string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
  33 +string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ")
  34 +
  35 +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
  36 +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
  37 +set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
  38 +
  39 +if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  40 + message(FATAL_ERROR "The default suffix for building executables should be .js!")
  41 +endif()
  42 +# set(CMAKE_EXECUTABLE_SUFFIX ".html")
  43 +
  44 +add_executable(sherpa-onnx-wasm-main sherpa-onnx-wasm-main.cc)
  45 +target_link_libraries(sherpa-onnx-wasm-main sherpa-onnx-c-api)
  46 +install(TARGETS sherpa-onnx-wasm-main DESTINATION bin/wasm)
  47 +
  48 +install(
  49 + FILES
  50 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.js"
  51 + "index.html"
  52 + "sherpa-onnx.js"
  53 + "app.js"
  54 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.wasm"
  55 + "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.data"
  56 + # "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main>/sherpa-onnx-wasm-main.html"
  57 + DESTINATION
  58 + bin/wasm
  59 +)
  1 +const generateBtn = document.getElementById('generateBtn');
  2 +const hint = document.getElementById('hint');
  3 +const speakerIdLabel = document.getElementById('speakerIdLabel');
  4 +const speakerIdInput = document.getElementById('speakerId');
  5 +const speedInput = document.getElementById('speed');
  6 +const speedValue = document.getElementById('speedValue');
  7 +const textArea = document.getElementById('text');
  8 +const soundClips = document.getElementById('sound-clips');
  9 +
  10 +speedValue.innerHTML = speedInput.value;
  11 +
  12 +let index = 0;
  13 +
  14 +
  15 +let tts = null;
  16 +
  17 +let audioCtx = null;
  18 +
  19 +
  20 +Module = {};
  21 +Module.onRuntimeInitialized = function() {
  22 + console.log('Model files downloaded!');
  23 +
  24 + console.log('Initializing tts ......');
  25 + tts = initSherpaOnnxOfflineTts()
  26 + if (tts.numSpeakers > 1) {
  27 + speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
  28 + }
  29 +
  30 + hint.innerText =
  31 + 'Initialized! Please enter text and click the Generate button.';
  32 +
  33 +
  34 +
  35 + generateBtn.disabled = false;
  36 +};
  37 +
  38 +speedInput.oninput = function() {
  39 + speedValue.innerHTML = this.value;
  40 +};
  41 +
  42 +generateBtn.onclick = function() {
  43 + let speakerId = speakerIdInput.value;
  44 + if (speakerId.trim().length == 0) {
  45 + alert('Please input a speakerId');
  46 + return;
  47 + }
  48 +
  49 + if (!speakerId.match(/^\d+$/)) {
  50 + alert(`Input speakerID ${
  51 + speakerId} is not a number.\nPlease enter a number between 0 and ${
  52 + tts.numSpeakers - 1}`);
  53 + return;
  54 + }
  55 + speakerId = parseInt(speakerId, 10);
  56 + if (speakerId > tts.numSpeakers - 1) {
  57 + alert(`Pleaser enter a number between 0 and ${tts.numSpeakers - 1}`);
  58 + return;
  59 + }
  60 +
  61 + let text = textArea.value.trim();
  62 + if (text.length == 0) {
  63 + alert('Please input a non-blank text');
  64 + return;
  65 + }
  66 +
  67 + console.log('speakerId', speakerId);
  68 + console.log('speed', speedInput.value);
  69 + console.log('text', text);
  70 +
  71 + let audio =
  72 + tts.generate({text: text, sid: speakerId, speed: speedInput.value});
  73 +
  74 + console.log(audio.samples.length, audio.sampleRate);
  75 +
  76 + if (!audioCtx) {
  77 + audioCtx = new AudioContext({sampleRate: tts.sampleRate});
  78 + }
  79 +
  80 + const buffer = audioCtx.createBuffer(1, audio.samples.length, tts.sampleRate);
  81 +
  82 + const ptr = buffer.getChannelData(0);
  83 + for (let i = 0; i < audio.samples.length; i++) {
  84 + ptr[i] = audio.samples[i];
  85 + }
  86 + const source = audioCtx.createBufferSource();
  87 + source.buffer = buffer;
  88 + source.connect(audioCtx.destination);
  89 + source.start();
  90 +
  91 + createAudioTag(audio);
  92 +};
  93 +
  94 +function createAudioTag(generateAudio) {
  95 + const blob = toWav(generateAudio.samples, generateAudio.sampleRate);
  96 +
  97 + const text = textArea.value.trim().substring(0, 100);
  98 + const clipName = `${index} ${text} ...`;
  99 + index += 1;
  100 +
  101 + const clipContainer = document.createElement('article');
  102 + const clipLabel = document.createElement('p');
  103 + const audio = document.createElement('audio');
  104 + const deleteButton = document.createElement('button');
  105 + clipContainer.classList.add('clip');
  106 + audio.setAttribute('controls', '');
  107 + deleteButton.textContent = 'Delete';
  108 + deleteButton.className = 'delete';
  109 +
  110 + clipLabel.textContent = clipName;
  111 +
  112 + clipContainer.appendChild(audio);
  113 +
  114 + clipContainer.appendChild(clipLabel);
  115 + clipContainer.appendChild(deleteButton);
  116 + soundClips.appendChild(clipContainer);
  117 +
  118 + audio.controls = true;
  119 +
  120 + const audioURL = window.URL.createObjectURL(blob);
  121 + audio.src = audioURL;
  122 +
  123 + deleteButton.onclick = function(e) {
  124 + let evtTgt = e.target;
  125 + evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
  126 + };
  127 +
  128 + clipLabel.onclick = function() {
  129 + const existingName = clipLabel.textContent;
  130 + const newClipName = prompt('Enter a new name for your sound clip?');
  131 + if (newClipName === null) {
  132 + clipLabel.textContent = existingName;
  133 + } else {
  134 + clipLabel.textContent = newClipName;
  135 + }
  136 + };
  137 +}
  138 +
  139 +// this function is copied/modified from
  140 +// https://gist.github.com/meziantou/edb7217fddfbb70e899e
  141 +function toWav(floatSamples, sampleRate) {
  142 + let samples = new Int16Array(floatSamples.length);
  143 + for (let i = 0; i < samples.length; ++i) {
  144 + let s = floatSamples[i];
  145 + if (s >= 1)
  146 + s = 1;
  147 + else if (s <= -1)
  148 + s = -1;
  149 +
  150 + samples[i] = s * 32767;
  151 + }
  152 +
  153 + let buf = new ArrayBuffer(44 + samples.length * 2);
  154 + var view = new DataView(buf);
  155 +
  156 + // http://soundfile.sapp.org/doc/WaveFormat/
  157 + // F F I R
  158 + view.setUint32(0, 0x46464952, true); // chunkID
  159 + view.setUint32(4, 36 + samples.length * 2, true); // chunkSize
  160 + // E V A W
  161 + view.setUint32(8, 0x45564157, true); // format
  162 + //
  163 + // t m f
  164 + view.setUint32(12, 0x20746d66, true); // subchunk1ID
  165 + view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
  166 + view.setUint32(20, 1, true); // audioFormat, 1 for PCM
  167 + view.setUint16(22, 1, true); // numChannels: 1 channel
  168 + view.setUint32(24, sampleRate, true); // sampleRate
  169 + view.setUint32(28, sampleRate * 2, true); // byteRate
  170 + view.setUint16(32, 2, true); // blockAlign
  171 + view.setUint16(34, 16, true); // bitsPerSample
  172 + view.setUint32(36, 0x61746164, true); // Subchunk2ID
  173 + view.setUint32(40, samples.length * 2, true); // subchunk2Size
  174 +
  175 + let offset = 44;
  176 + for (let i = 0; i < samples.length; ++i) {
  177 + view.setInt16(offset, samples[i], true);
  178 + offset += 2;
  179 + }
  180 +
  181 + return new Blob([view], {type: 'audio/wav'});
  182 +}
  1 +*.onnx
  2 +*.txt
  3 +espeak-ng-data
  4 +
  1 +# Introduction
  2 +
  3 +Please refer to
  4 +https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
  5 +to download a model.
  6 +
  7 +The following is an example:
  8 +```
  9 +cd sherpa-onnx/wasm/tts/assets
  10 +
  11 +wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
  12 +tar xf vits-piper-en_US-libritts_r-medium.tar.bz2
  13 +rm vits-piper-en_US-libritts_r-medium.tar.bz2
  14 +mv vits-piper-en_US-libritts_r-medium/en_US-libritts_r-medium.onnx ./model.onnx
  15 +mv vits-piper-en_US-libritts_r-medium/tokens.txt ./
  16 +mv vits-piper-en_US-libritts_r-medium/espeak-ng-data ./
  17 +rm -rf vits-piper-en_US-libritts_r-medium
  18 +```
  19 +
  20 +You should have the following files in `assets` before you can run
  21 +`build-wasm-simd.sh`
  22 +
  23 +```
  24 +assets fangjun$ tree -L 1
  25 +.
  26 +├── README.md
  27 +├── espeak-ng-data
  28 +├── mode.onnx
  29 +└── tokens.txt
  30 +
  31 +1 directory, 3 files
  32 +```
  1 +<html lang="en">
  2 +
  3 +<head>
  4 + <meta charset="utf-8">
  5 + <meta name="viewport" content="width=device-width" />
  6 + <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
  7 + <style>
  8 + h1,div {
  9 + text-align: center;
  10 + }
  11 + textarea {
  12 + width:100%;
  13 + }
  14 + </style>
  15 +</head>
  16 +
  17 +<body>
  18 + <h1>
  19 + Next-gen Kaldi + WebAssembly<br/>
  20 + Text-to-speech Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>
  21 + </h1>
  22 + <div>
  23 + <span id="hint">Loading model ... ...</span>
  24 + <br/>
  25 + <br/>
  26 + <label for="speakerId" id="speakerIdLabel">Speaker ID: </label>
  27 + <input type="text" id="speakerId" name="speakerId" value="0" />
  28 + <br/>
  29 + <br/>
  30 + <label for="speed" id="speedLabel">Speed: </label>
  31 + <input type="range" id="speed" name="speed" min="0.4" max="3.5" step="0.1" value="1.0" />
  32 + <span id="speedValue"></span>
  33 + <br/>
  34 + <br/>
  35 + <textarea id="text" rows="10" placeholder="Please enter your text here and click the Generate button"></textarea>
  36 + <br/>
  37 + <br/>
  38 + <button id="generateBtn" disabled>Generate</button>
  39 + </div>
  40 + <section flex="1" overflow="auto" id="sound-clips">
  41 + </section>
  42 +
  43 + <script src="app.js"></script>
  44 + <script src="sherpa-onnx.js"></script>
  45 + <script src="sherpa-onnx-wasm-main.js"></script>
  46 +</body>
  1 +// wasm/sherpa-onnx-wasm-main.cc
  2 +//
  3 +// Copyright (c) 2024 Xiaomi Corporation
  4 +#include <stdio.h>
  5 +
  6 +#include <algorithm>
  7 +#include <memory>
  8 +
  9 +#include "sherpa-onnx/c-api/c-api.h"
  10 +
  11 +// see also
  12 +// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
  13 +
  14 +extern "C" {
  15 +
  16 +static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 7 * 4, "");
  17 +static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
  18 + sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4,
  19 + "");
  20 +static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
  21 + sizeof(SherpaOnnxOfflineTtsModelConfig) + 2 * 4,
  22 + "");
  23 +
  24 +void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
  25 + auto tts_model_config = &tts_config->model;
  26 + auto vits_model_config = &tts_model_config->vits;
  27 + fprintf(stdout, "----------vits model config----------\n");
  28 + fprintf(stdout, "model: %s\n", vits_model_config->model);
  29 + fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
  30 + fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
  31 + fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
  32 + fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
  33 + fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
  34 + fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
  35 +
  36 + fprintf(stdout, "----------tts model config----------\n");
  37 + fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
  38 + fprintf(stdout, "debug: %d\n", tts_model_config->debug);
  39 + fprintf(stdout, "provider: %s\n", tts_model_config->provider);
  40 +
  41 + fprintf(stdout, "----------tts config----------\n");
  42 + fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
  43 + fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
  44 +}
  45 +
  46 +void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  47 + std::copy(src, src + num_bytes, dst);
  48 +}
  49 +}
  1 +
  2 +function freeConfig(config) {
  3 + if ('buffer' in config) {
  4 + _free(config.buffer);
  5 + }
  6 +
  7 + if ('config' in config) {
  8 + freeConfig(config.config)
  9 + }
  10 +
  11 + _free(config.ptr);
  12 +}
  13 +
  14 +// The user should free the returned pointers
  15 +function initSherpaOnnxOfflineTtsVitsModelConfig(config) {
  16 + let modelLen = lengthBytesUTF8(config.model) + 1;
  17 + let lexiconLen = lengthBytesUTF8(config.lexicon) + 1;
  18 + let tokensLen = lengthBytesUTF8(config.tokens) + 1;
  19 + let dataDirLen = lengthBytesUTF8(config.dataDir) + 1;
  20 +
  21 + let n = modelLen + lexiconLen + tokensLen + dataDirLen;
  22 +
  23 + let buffer = _malloc(n);
  24 +
  25 + let len = 7 * 4;
  26 + let ptr = _malloc(len);
  27 +
  28 + let offset = 0;
  29 + stringToUTF8(config.model, buffer + offset, modelLen);
  30 + offset += modelLen;
  31 +
  32 + stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
  33 + offset += lexiconLen;
  34 +
  35 + stringToUTF8(config.tokens, buffer + offset, tokensLen);
  36 + offset += tokensLen;
  37 +
  38 + stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
  39 + offset += dataDirLen;
  40 +
  41 + offset = 0;
  42 + setValue(ptr, buffer + offset, 'i8*');
  43 + offset += modelLen;
  44 +
  45 + setValue(ptr + 4, buffer + offset, 'i8*');
  46 + offset += lexiconLen;
  47 +
  48 + setValue(ptr + 8, buffer + offset, 'i8*');
  49 + offset += tokensLen;
  50 +
  51 + setValue(ptr + 12, buffer + offset, 'i8*');
  52 + offset += dataDirLen;
  53 +
  54 + setValue(ptr + 16, config.noiseScale, 'float');
  55 + setValue(ptr + 20, config.noiseScaleW, 'float');
  56 + setValue(ptr + 24, config.lengthScale, 'float');
  57 +
  58 + return {
  59 + buffer: buffer, ptr: ptr, len: len,
  60 + }
  61 +}
  62 +
  63 +function initSherpaOnnxOfflineTtsModelConfig(config) {
  64 + let vitsModelConfig =
  65 + initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig);
  66 +
  67 + let len = vitsModelConfig.len + 3 * 4;
  68 + let ptr = _malloc(len);
  69 +
  70 + let offset = 0;
  71 + _CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
  72 + offset += vitsModelConfig.len;
  73 +
  74 + setValue(ptr + offset, config.numThreads, 'i32');
  75 + offset += 4;
  76 +
  77 + setValue(ptr + offset, config.debug, 'i32');
  78 + offset += 4;
  79 +
  80 + let providerLen = lengthBytesUTF8(config.provider) + 1;
  81 + let buffer = _malloc(providerLen);
  82 + stringToUTF8(config.provider, buffer, providerLen);
  83 + setValue(ptr + offset, buffer, 'i8*');
  84 +
  85 + return {
  86 + buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
  87 + }
  88 +}
  89 +
  90 +function initSherpaOnnxOfflineTtsConfig(config) {
  91 + let modelConfig =
  92 + initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig);
  93 + let len = modelConfig.len + 2 * 4;
  94 + let ptr = _malloc(len);
  95 +
  96 + let offset = 0;
  97 + _CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
  98 + offset += modelConfig.len;
  99 +
  100 + let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1;
  101 + let buffer = _malloc(ruleFstsLen);
  102 + stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
  103 + setValue(ptr + offset, buffer, 'i8*');
  104 + offset += 4;
  105 +
  106 + setValue(ptr + offset, config.maxNumSentences, 'i32');
  107 +
  108 + return {
  109 + buffer: buffer, ptr: ptr, len: len, config: modelConfig,
  110 + }
  111 +}
  112 +
  113 +class OfflineTts {
  114 + constructor(configObj) {
  115 + let config = initSherpaOnnxOfflineTtsConfig(configObj)
  116 + let handle = _SherpaOnnxCreateOfflineTts(config.ptr);
  117 +
  118 + freeConfig(config);
  119 +
  120 + this.handle = handle;
  121 + this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle);
  122 + this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle);
  123 + }
  124 +
  125 + free() {
  126 + _SherpaOnnxDestroyOfflineTts(this.handle);
  127 + this.handle = 0
  128 + }
  129 +
  130 + // {
  131 + // text: "hello",
  132 + // sid: 1,
  133 + // speed: 1.0
  134 + // }
  135 + generate(config) {
  136 + let textLen = lengthBytesUTF8(config.text) + 1;
  137 + let textPtr = _malloc(textLen);
  138 + stringToUTF8(config.text, textPtr, textLen);
  139 +
  140 + let h = _SherpaOnnxOfflineTtsGenerate(
  141 + this.handle, textPtr, config.sid, config.speed);
  142 +
  143 + let numSamples = HEAP32[h / 4 + 1];
  144 + let sampleRate = HEAP32[h / 4 + 2];
  145 +
  146 + let samplesPtr = HEAP32[h / 4] / 4;
  147 + let samples = new Float32Array(numSamples);
  148 + for (let i = 0; i < numSamples; i++) {
  149 + samples[i] = HEAPF32[samplesPtr + i];
  150 + }
  151 +
  152 + _SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
  153 + return {samples: samples, sampleRate: sampleRate};
  154 + }
  155 +}
  156 +
  157 +function initSherpaOnnxOfflineTts() {
  158 + let offlineTtsVitsModelConfig = {
  159 + model: './model.onnx',
  160 + lexicon: '',
  161 + tokens: './tokens.txt',
  162 + dataDir: './espeak-ng-data',
  163 + noiseScale: 0.667,
  164 + noiseScaleW: 0.8,
  165 + lengthScale: 1.0,
  166 + };
  167 + let offlineTtsModelConfig = {
  168 + offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
  169 + numThreads: 1,
  170 + debug: 1,
  171 + provider: 'cpu',
  172 + };
  173 + let offlineTtsConfig = {
  174 + offlineTtsModelConfig: offlineTtsModelConfig,
  175 + ruleFsts: '',
  176 + maxNumSentences: 1,
  177 + }
  178 +
  179 + return new OfflineTts(offlineTtsConfig);
  180 +}