Fangjun Kuang
Committed by GitHub

Support piper-phonemize (#452)

... ... @@ -19,10 +19,10 @@ log "------------------------------------------------------------"
wenet_models=(
sherpa-onnx-zh-wenet-aishell
sherpa-onnx-zh-wenet-aishell2
sherpa-onnx-zh-wenet-wenetspeech
# sherpa-onnx-zh-wenet-wenetspeech
sherpa-onnx-zh-wenet-multi-cn
sherpa-onnx-en-wenet-librispeech
sherpa-onnx-en-wenet-gigaspeech
# sherpa-onnx-en-wenet-gigaspeech
)
for name in ${wenet_models[@]}; do
repo_url=https://huggingface.co/csukuangfj/$name
... ...
... ... @@ -168,6 +168,9 @@ jobs:
lib_type=${{ matrix.lib_type }}
if [[ $lib_type == "shared" ]]; then
cp -a build-arm-linux-gnueabihf/install/lib $dst/
rm -v $dst/lib/libasound.so
rm -v $dst/lib/libonnxruntime.so
rm -v $dst/lib/libsherpa-onnx-fst.so
fi
tree $dst
... ...
name: test-piper-phonemize
on:
push:
branches:
- master
workflow_dispatch:
concurrency:
group: test-piper-phonemize-${{ github.ref }}
cancel-in-progress: true
jobs:
test_piper_phonemize:
name: ${{ matrix.os }} ${{ matrix.build_type }} ${{ matrix.shared_lib }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
build_type: [Release, Debug]
shared_lib: [ON, OFF]
exclude:
- os: windows-latest
build_type: Debug
shared_lib: OFF
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: ccache
uses: hendrikmuhs/ccache-action@v1.2
with:
key: ${{ matrix.os }}-${{ matrix.build_type }}-shared-${{ matrix.shared_lib }}
- name: Configure CMake
shell: bash
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
mkdir build
cd build
cmake -DCMAKE_VERBOSE_MAKEFILE=ON -D SHERPA_ONNX_ENABLE_TESTS=ON -D CMAKE_BUILD_TYPE=${{ matrix.build_type }} -D BUILD_SHARED_LIBS=${{ matrix.shared_lib }} -DCMAKE_INSTALL_PREFIX=./install ..
- name: Build
shell: bash
run: |
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
cmake --version
cd build
cmake --build . --target install --config ${{ matrix.build_type }}
- name: run test
if: matrix.os != 'windows-latest'
shell: bash
run: |
cd build
ls -lh install/
ls -lh install/share
ls -lh install/share/espeak-ng-data/
./bin/piper-phonemize-test
- name: run test
if: matrix.os == 'windows-latest'
shell: bash
run: |
cd build
ls -lh install/
ls -lh install/share
ls -lh install/share/espeak-ng-data/
./bin/${{ matrix.build_type }}/piper-phonemize-test
... ...
... ... @@ -159,6 +159,8 @@ endif()
include(kaldi-native-fbank)
include(kaldi-decoder)
include(onnxruntime)
set(ONNXRUNTIME_DIR ${onnxruntime_SOURCE_DIR})
message(STATUS "ONNXRUNTIME_DIR: ${ONNXRUNTIME_DIR}")
if(SHERPA_ONNX_ENABLE_PORTAUDIO)
include(portaudio)
... ... @@ -178,6 +180,11 @@ if(SHERPA_ONNX_ENABLE_WEBSOCKET)
include(asio)
endif()
include(espeak-ng-for-piper)
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
include(piper-phonemize)
add_subdirectory(sherpa-onnx)
if(SHERPA_ONNX_ENABLE_C_API)
... ...
... ... @@ -45,6 +45,10 @@ if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then
fi
cmake \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
... ...
... ... @@ -72,6 +72,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
... ...
... ... @@ -73,6 +73,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
... ...
... ... @@ -73,6 +73,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
... ...
... ... @@ -73,6 +73,10 @@ echo "SHERPA_ONNXRUNTIME_LIB_DIR: $SHERPA_ONNXRUNTIME_LIB_DIR"
echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
cmake -DCMAKE_TOOLCHAIN_FILE="$ANDROID_NDK/build/cmake/android.toolchain.cmake" \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=ON \
-DSHERPA_ONNX_ENABLE_PYTHON=OFF \
... ...
... ... @@ -40,6 +40,10 @@ if [[ x"$BUILD_SHARED_LIBS" == x"" ]]; then
fi
cmake \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
... ...
... ... @@ -51,6 +51,10 @@ echo "SHERPA_ONNXRUNTIME_INCLUDE_DIR $SHERPA_ONNXRUNTIME_INCLUDE_DIR"
#
cmake \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-S .. \
-DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \
-DPLATFORM=SIMULATOR64 \
... ... @@ -74,6 +78,10 @@ cmake --build build/simulator_x86_64 -j 4 --verbose
echo "Building for simulator (arm64)"
cmake \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-S .. \
-DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \
-DPLATFORM=SIMULATORARM64 \
... ... @@ -101,6 +109,10 @@ export SHERPA_ONNXRUNTIME_LIB_DIR=$PWD/ios-onnxruntime/onnxruntime.xcframework/i
cmake \
-DBUILD_PIPER_PHONMIZE_EXE=OFF \
-DBUILD_PIPER_PHONMIZE_TESTS=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DBUILD_ESPEAK_NG_TESTS=OFF \
-S .. \
-DCMAKE_TOOLCHAIN_FILE=./toolchains/ios.toolchain.cmake \
-DPLATFORM=OS64 \
... ...
function(download_espeak_ng_for_piper)
include(FetchContent)
set(espeak_ng_URL "https://github.com/csukuangfj/espeak-ng/archive/c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip")
set(espeak_ng_URL2 "")
set(espeak_ng_HASH "SHA256=8a48251e6926133dd91fcf6cb210c7c2e290a9b578d269446e2d32d710b0dfa0")
set(USE_ASYNC OFF CACHE BOOL "" FORCE)
set(USE_MBROLA OFF CACHE BOOL "" FORCE)
set(USE_LIBSONIC OFF CACHE BOOL "" FORCE)
set(USE_LIBPCAUDIO OFF CACHE BOOL "" FORCE)
set(USE_KLATT OFF CACHE BOOL "" FORCE)
set(USE_SPEECHPLAYER OFF CACHE BOOL "" FORCE)
set(EXTRA_cmn ON CACHE BOOL "" FORCE)
set(EXTRA_ru ON CACHE BOOL "" FORCE)
# If you don't have access to the Internet,
# please pre-download kaldi-decoder
set(possible_file_locations
$ENV{HOME}/Downloads/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip
${PROJECT_SOURCE_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip
${PROJECT_BINARY_DIR}/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip
/tmp/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip
/star-fj/fangjun/download/github/espeak-ng-c58d2a4a88e9a291ca448f046e15c6188cbd3b3a.zip
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(espeak_ng_URL "${f}")
file(TO_CMAKE_PATH "${espeak_ng_URL}" espeak_ng_URL)
message(STATUS "Found local downloaded espeak-ng: ${espeak_ng_URL}")
set(espeak_ng_URL2 )
break()
endif()
endforeach()
FetchContent_Declare(espeak_ng
URL
${espeak_ng_URL}
${espeak_ng_URL2}
URL_HASH ${espeak_ng_HASH}
)
FetchContent_GetProperties(espeak_ng)
if(NOT espeak_ng_POPULATED)
message(STATUS "Downloading espeak-ng from ${espeak_ng_URL}")
FetchContent_Populate(espeak_ng)
endif()
message(STATUS "espeak-ng is downloaded to ${espeak_ng_SOURCE_DIR}")
message(STATUS "espeak-ng binary dir is ${espeak_ng_BINARY_DIR}")
add_subdirectory(${espeak_ng_SOURCE_DIR} ${espeak_ng_BINARY_DIR})
set(espeak_ng_SOURCE_DIR ${espeak_ng_SOURCE_DIR} PARENT_SCOPE)
if(WIN32 AND MSVC)
target_compile_options(ucd PUBLIC
/wd4309
)
target_compile_options(espeak-ng PUBLIC
/wd4005
/wd4018
/wd4067
/wd4068
/wd4090
/wd4101
/wd4244
/wd4267
/wd4996
)
if(TARGET espeak-ng-bin)
target_compile_options(espeak-ng-bin PRIVATE
/wd4244
/wd4024
/wd4047
/wd4067
/wd4267
/wd4996
)
endif()
endif()
if(UNIX AND NOT APPLE)
target_compile_options(espeak-ng PRIVATE
-Wno-unused-result
-Wno-format-overflow
-Wno-format-truncation
-Wno-maybe-uninitialized
-Wno-format
)
if(TARGET espeak-ng-bin)
target_compile_options(espeak-ng-bin PRIVATE
-Wno-unused-result
)
endif()
endif()
target_include_directories(espeak-ng
INTERFACE
${espeak_ng_SOURCE_DIR}/src/include
${espeak_ng_SOURCE_DIR}/src/ucd-tools/src/include
)
if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32)
install(TARGETS
espeak-ng
DESTINATION ..)
else()
install(TARGETS
espeak-ng
DESTINATION lib)
endif()
if(NOT BUILD_SHARED_LIBS)
install(TARGETS ucd DESTINATION lib)
endif()
if(WIN32 AND BUILD_SHARED_LIBS)
install(TARGETS
espeak-ng
DESTINATION bin)
endif()
endfunction()
download_espeak_ng_for_piper()
... ...
... ... @@ -47,6 +47,13 @@ function(download_kaldi_decoder)
include_directories(${kaldi_decoder_SOURCE_DIR})
add_subdirectory(${kaldi_decoder_SOURCE_DIR} ${kaldi_decoder_BINARY_DIR} EXCLUDE_FROM_ALL)
if(WIN32 AND MSVC)
target_compile_options(kaldi-decoder-core PUBLIC
/wd4018
/wd4291
)
endif()
target_include_directories(kaldi-decoder-core
INTERFACE
${kaldi-decoder_SOURCE_DIR}/
... ...
... ... @@ -94,6 +94,7 @@ function(download_onnxruntime)
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(FATAL_ERROR "Only support Linux, macOS, and Windows at present. Will support other OSes later")
endif()
set(onnxruntime_SOURCE_DIR ${onnxruntime_SOURCE_DIR} PARENT_SCOPE)
endfunction()
# First, we try to locate the header and the lib if the use has already
... ...
function(download_piper_phonemize)
include(FetchContent)
set(piper_phonemize_URL "https://github.com/csukuangfj/piper-phonemize/archive/6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip")
set(piper_phonemize_URL2 "")
set(piper_phonemize_HASH "SHA256=6fbacf540b03f00d1386bb372fb7090e3bb852bd019d74e615d3f161f728bc93")
# If you don't have access to the Internet,
# please pre-download kaldi-decoder
set(possible_file_locations
$ENV{HOME}/Downloads/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip
${PROJECT_SOURCE_DIR}/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip
${PROJECT_BINARY_DIR}/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip
/tmp/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip
/star-fj/fangjun/download/github/piper-phonemize-6383e46b62e94c5cafc0c6a6212249ed8b9ed8d0.zip
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(piper_phonemize_URL "${f}")
file(TO_CMAKE_PATH "${piper_phonemize_URL}" piper_phonemize_URL)
message(STATUS "Found local downloaded espeak-ng: ${piper_phonemize_URL}")
set(piper_phonemize_URL2 )
break()
endif()
endforeach()
FetchContent_Declare(piper_phonemize
URL
${piper_phonemize_URL}
${piper_phonemize_URL2}
URL_HASH ${piper_phonemize_HASH}
)
FetchContent_GetProperties(piper_phonemize)
if(NOT piper_phonemize_POPULATED)
message(STATUS "Downloading piper-phonemize from ${piper_phonemize_URL}")
FetchContent_Populate(piper_phonemize)
endif()
message(STATUS "piper-phonemize is downloaded to ${piper_phonemize_SOURCE_DIR}")
message(STATUS "piper-phonemize binary dir is ${piper_phonemize_BINARY_DIR}")
add_subdirectory(${piper_phonemize_SOURCE_DIR} ${piper_phonemize_BINARY_DIR} EXCLUDE_FROM_ALL)
if(WIN32 AND MSVC)
target_compile_options(piper_phonemize PUBLIC
/wd4309
)
endif()
target_include_directories(piper_phonemize
INTERFACE
${piper_phonemize_SOURCE_DIR}/src/include
)
if(SHERPA_ONNX_ENABLE_PYTHON AND WIN32)
install(TARGETS
piper_phonemize
DESTINATION ..)
else()
install(TARGETS
piper_phonemize
DESTINATION lib)
endif()
if(WIN32 AND BUILD_SHARED_LIBS)
install(TARGETS
piper_phonemize
DESTINATION bin)
endif()
endfunction()
download_piper_phonemize()
... ...
... ... @@ -106,6 +106,11 @@ if(SHERPA_ONNX_ENABLE_CHECK)
list(APPEND sources log.cc)
endif()
add_library(sherpa-onnx-core ${sources})
if(APPLE)
target_compile_options(sherpa-onnx-core PRIVATE
-Wno-deprecated-declarations
)
endif()
if(NOT WIN32)
target_link_libraries(sherpa-onnx-core -pthread)
... ... @@ -136,6 +141,8 @@ if(SHERPA_ONNX_ENABLE_GPU)
)
endif()
target_link_libraries(sherpa-onnx-core piper_phonemize)
if(SHERPA_ONNX_ENABLE_CHECK)
target_compile_definitions(sherpa-onnx-core PUBLIC SHERPA_ONNX_ENABLE_CHECK=1)
... ... @@ -343,6 +350,7 @@ if(SHERPA_ONNX_ENABLE_TESTS)
context-graph-test.cc
packed-sequence-test.cc
pad-sequence-test.cc
piper-phonemize-test.cc
slice-test.cc
stack-test.cc
transpose-test.cc
... ...
// sherpa-onnx/csrc/piper-phonemize-test.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "espeak-ng/speak_lib.h"
#include "gtest/gtest.h"
#include "phoneme_ids.hpp"
#include "phonemize.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
namespace sherpa_onnx {
TEST(PiperPhonemize, Case1) {
std::string data_dir = "./install/share/espeak-ng-data";
if (!FileExists(data_dir + "/en_dict")) {
SHERPA_ONNX_LOGE("%s/en_dict does not exist. Skipping test",
data_dir.c_str());
return;
}
if (!FileExists(data_dir + "/phontab")) {
SHERPA_ONNX_LOGE("%s/phontab does not exist. Skipping test",
data_dir.c_str());
return;
}
if (!FileExists(data_dir + "/phonindex")) {
SHERPA_ONNX_LOGE("%s/phonindex does not exist. Skipping test",
data_dir.c_str());
return;
}
if (!FileExists(data_dir + "/phondata")) {
SHERPA_ONNX_LOGE("%s/phondata does not exist. Skipping test",
data_dir.c_str());
return;
}
if (!FileExists(data_dir + "/intonations")) {
SHERPA_ONNX_LOGE("%s/intonations does not exist. Skipping test",
data_dir.c_str());
return;
}
int32_t result =
espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 0, data_dir.c_str(), 0);
EXPECT_EQ(result, 22050);
piper::eSpeakPhonemeConfig config;
// ./bin/espeak-ng --path ./install/share/espeak-ng-data/ --voices
// to list available voices
config.voice = "en-us";
std::vector<std::vector<piper::Phoneme>> phonemes;
std::string text = "how are you doing?";
piper::phonemize_eSpeak(text, config, phonemes);
for (int32_t p : phonemes[0]) {
std::cout << p << " ";
}
std::cout << "\n";
std::vector<piper::PhonemeId> phonemeIds;
std::map<piper::Phoneme, std::size_t> missingPhonemes;
{
piper::PhonemeIdConfig config;
phonemes_to_ids(phonemes[0], config, phonemeIds, missingPhonemes);
}
for (int32_t p : phonemeIds) {
std::cout << p << " ";
}
std::cout << "\n";
}
} // namespace sherpa_onnx
... ...