Fangjun Kuang
Committed by GitHub

Support text normalization via rule FST (#407)

@@ -100,8 +100,11 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}") @@ -100,8 +100,11 @@ message(STATUS "SHERPA_ONNX_ENABLE_C_API ${SHERPA_ONNX_ENABLE_C_API}")
100 message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}") 100 message(STATUS "SHERPA_ONNX_ENABLE_WEBSOCKET ${SHERPA_ONNX_ENABLE_WEBSOCKET}")
101 message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}") 101 message(STATUS "SHERPA_ONNX_ENABLE_GPU ${SHERPA_ONNX_ENABLE_GPU}")
102 102
103 -set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") 103 +if(NOT CMAKE_CXX_STANDARD)
  104 + set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
  105 +endif()
104 set(CMAKE_CXX_EXTENSIONS OFF) 106 set(CMAKE_CXX_EXTENSIONS OFF)
  107 +message(STATUS "C++ Standard version: ${CMAKE_CXX_STANDARD}")
105 108
106 include(CheckIncludeFileCXX) 109 include(CheckIncludeFileCXX)
107 check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) 110 check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
1 function(download_kaldifst) 1 function(download_kaldifst)
2 include(FetchContent) 2 include(FetchContent)
3 3
4 - set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.6.tar.gz")  
5 - set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.6.tar.gz")  
6 - set(kaldifst_HASH "SHA256=79280c0bb08b5ed1a2ab7c21320a2b071f1f0eb10d2f047e8d6f027f0d32b4d2") 4 + set(kaldifst_URL "https://github.com/k2-fsa/kaldifst/archive/refs/tags/v1.7.8.tar.gz")
  5 + set(kaldifst_URL2 "https://huggingface.co/csukuangfj/kaldi-hmm-gmm-cmake-deps/resolve/main/kaldifst-1.7.8.tar.gz")
  6 + set(kaldifst_HASH "SHA256=94613923568ef9a240ba1059b8b9dfe3082daad794934635d99e66248a6687b5")
7 7
8 # If you don't have access to the Internet, 8 # If you don't have access to the Internet,
9 # please pre-download kaldifst 9 # please pre-download kaldifst
10 set(possible_file_locations 10 set(possible_file_locations
11 - $ENV{HOME}/Downloads/kaldifst-1.7.6.tar.gz  
12 - ${PROJECT_SOURCE_DIR}/kaldifst-1.7.6.tar.gz  
13 - ${PROJECT_BINARY_DIR}/kaldifst-1.7.6.tar.gz  
14 - /tmp/kaldifst-1.7.6.tar.gz  
15 - /star-fj/fangjun/download/github/kaldifst-1.7.6.tar.gz 11 + $ENV{HOME}/Downloads/kaldifst-1.7.8.tar.gz
  12 + ${PROJECT_SOURCE_DIR}/kaldifst-1.7.8.tar.gz
  13 + ${PROJECT_BINARY_DIR}/kaldifst-1.7.8.tar.gz
  14 + /tmp/kaldifst-1.7.8.tar.gz
  15 + /star-fj/fangjun/download/github/kaldifst-1.7.8.tar.gz
16 ) 16 )
17 17
18 foreach(f IN LISTS possible_file_locations) 18 foreach(f IN LISTS possible_file_locations)
@@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS) @@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS)
14 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") 14 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
15 endif() 15 endif()
16 16
17 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-linux-aarch64-1.16.1.tgz") 17 +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-aarch64-1.16.1.tgz")
18 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-1.16.1.tgz") 18 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-aarch64-1.16.1.tgz")
19 set(onnxruntime_HASH "SHA256=f10851b62eb44f9e811134737e7c6edd15733d2c1549cb6ce403808e9c047385") 19 set(onnxruntime_HASH "SHA256=f10851b62eb44f9e811134737e7c6edd15733d2c1549cb6ce403808e9c047385")
20 20
@@ -18,7 +18,7 @@ if(NOT SHERPA_ONNX_ENABLE_GPU) @@ -18,7 +18,7 @@ if(NOT SHERPA_ONNX_ENABLE_GPU)
18 message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}") 18 message(FATAL_ERROR "This file is for NVIDIA GPU only. Given SHERPA_ONNX_ENABLE_GPU: ${SHERPA_ONNX_ENABLE_GPU}")
19 endif() 19 endif()
20 20
21 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-linux-x64-gpu-1.16.1.tgz") 21 +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-x64-gpu-1.16.1.tgz")
22 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.16.1.tgz") 22 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-gpu-1.16.1.tgz")
23 set(onnxruntime_HASH "SHA256=474d5d74b588d54aa3e167f38acc9b1b8d20c292d0db92299bdc33a81eb4492d") 23 set(onnxruntime_HASH "SHA256=474d5d74b588d54aa3e167f38acc9b1b8d20c292d0db92299bdc33a81eb4492d")
24 24
@@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS) @@ -14,7 +14,7 @@ if(NOT BUILD_SHARED_LIBS)
14 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") 14 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
15 endif() 15 endif()
16 16
17 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-linux-x64-1.16.1.tgz") 17 +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-linux-x64-1.16.1.tgz")
18 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-1.16.1.tgz") 18 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-linux-x64-1.16.1.tgz")
19 set(onnxruntime_HASH "SHA256=53a0f03f71587ed602e99e82773132fc634b74c2d227316fbfd4bf67181e72ed") 19 set(onnxruntime_HASH "SHA256=53a0f03f71587ed602e99e82773132fc634b74c2d227316fbfd4bf67181e72ed")
20 20
@@ -12,7 +12,7 @@ if(NOT BUILD_SHARED_LIBS) @@ -12,7 +12,7 @@ if(NOT BUILD_SHARED_LIBS)
12 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") 12 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
13 endif() 13 endif()
14 14
15 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-osx-arm64-1.16.1.tgz") 15 +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-osx-arm64-1.16.1.tgz")
16 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-1.16.1.tgz") 16 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-arm64-1.16.1.tgz")
17 set(onnxruntime_HASH "SHA256=56ca6b8de3a220ea606c2067ba65d11dfa6e4f722e01ac7dc75f7152b81445e0") 17 set(onnxruntime_HASH "SHA256=56ca6b8de3a220ea606c2067ba65d11dfa6e4f722e01ac7dc75f7152b81445e0")
18 18
@@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS) @@ -13,7 +13,7 @@ if(NOT BUILD_SHARED_LIBS)
13 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") 13 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
14 endif() 14 endif()
15 15
16 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-osx-universal2-1.16.1.tgz") 16 +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-osx-universal2-1.16.1.tgz")
17 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-1.16.1.tgz") 17 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-universal2-1.16.1.tgz")
18 set(onnxruntime_HASH "SHA256=e8568a4a3f602c25ea7c3bbd2f085340dff5bb68fa7c859fd763d944105e3d76") 18 set(onnxruntime_HASH "SHA256=e8568a4a3f602c25ea7c3bbd2f085340dff5bb68fa7c859fd763d944105e3d76")
19 19
@@ -12,7 +12,7 @@ if(NOT BUILD_SHARED_LIBS) @@ -12,7 +12,7 @@ if(NOT BUILD_SHARED_LIBS)
12 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}") 12 message(FATAL_ERROR "This file is for building shared libraries. BUILD_SHARED_LIBS: ${BUILD_SHARED_LIBS}")
13 endif() 13 endif()
14 14
15 -set(onnxruntime_URL "https://github.com/csukuangfj/onnxruntime-libs/releases/download/v1.16.1/onnxruntime-osx-x86_64-1.16.1.tgz") 15 +set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.16.1/onnxruntime-osx-x86_64-1.16.1.tgz")
16 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-1.16.1.tgz") 16 set(onnxruntime_URL2 "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-osx-x86_64-1.16.1.tgz")
17 set(onnxruntime_HASH "SHA256=0b8ae24401a8f75e1c4f75257d4eaeb1b6d44055e027df4aa4a84e67e0f9b9e3") 17 set(onnxruntime_HASH "SHA256=0b8ae24401a8f75e1c4f75257d4eaeb1b6d44055e027df4aa4a84e67e0f9b9e3")
18 18
@@ -14,30 +14,50 @@ @@ -14,30 +14,50 @@
14 #include "android/asset_manager_jni.h" 14 #include "android/asset_manager_jni.h"
15 #endif 15 #endif
16 16
  17 +#include "kaldifst/csrc/text-normalizer.h"
17 #include "sherpa-onnx/csrc/lexicon.h" 18 #include "sherpa-onnx/csrc/lexicon.h"
18 #include "sherpa-onnx/csrc/macros.h" 19 #include "sherpa-onnx/csrc/macros.h"
19 #include "sherpa-onnx/csrc/offline-tts-impl.h" 20 #include "sherpa-onnx/csrc/offline-tts-impl.h"
20 #include "sherpa-onnx/csrc/offline-tts-vits-model.h" 21 #include "sherpa-onnx/csrc/offline-tts-vits-model.h"
  22 +#include "sherpa-onnx/csrc/text-utils.h"
21 23
22 namespace sherpa_onnx { 24 namespace sherpa_onnx {
23 25
24 class OfflineTtsVitsImpl : public OfflineTtsImpl { 26 class OfflineTtsVitsImpl : public OfflineTtsImpl {
25 public: 27 public:
26 explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config) 28 explicit OfflineTtsVitsImpl(const OfflineTtsConfig &config)
27 - : model_(std::make_unique<OfflineTtsVitsModel>(config.model)), 29 + : config_(config),
  30 + model_(std::make_unique<OfflineTtsVitsModel>(config.model)),
28 lexicon_(config.model.vits.lexicon, config.model.vits.tokens, 31 lexicon_(config.model.vits.lexicon, config.model.vits.tokens,
29 model_->Punctuations(), model_->Language(), config.model.debug, 32 model_->Punctuations(), model_->Language(), config.model.debug,
30 - model_->IsPiper()) {} 33 + model_->IsPiper()) {
  34 + if (!config.rule_fsts.empty()) {
  35 + std::vector<std::string> files;
  36 + SplitStringToVector(config.rule_fsts, ",", false, &files);
  37 + tn_list_.reserve(files.size());
  38 + for (const auto &f : files) {
  39 + if (config.model.debug) {
  40 + SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
  41 + }
  42 + tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
  43 + }
  44 + }
  45 + }
31 46
32 #if __ANDROID_API__ >= 9 47 #if __ANDROID_API__ >= 9
33 OfflineTtsVitsImpl(AAssetManager *mgr, const OfflineTtsConfig &config) 48 OfflineTtsVitsImpl(AAssetManager *mgr, const OfflineTtsConfig &config)
34 - : model_(std::make_unique<OfflineTtsVitsModel>(mgr, config.model)), 49 + : config_(config),
  50 + model_(std::make_unique<OfflineTtsVitsModel>(mgr, config.model)),
35 lexicon_(mgr, config.model.vits.lexicon, config.model.vits.tokens, 51 lexicon_(mgr, config.model.vits.lexicon, config.model.vits.tokens,
36 model_->Punctuations(), model_->Language(), config.model.debug, 52 model_->Punctuations(), model_->Language(), config.model.debug,
37 - model_->IsPiper()) {} 53 + model_->IsPiper()) {
  54 + if (!config.rule_fsts.empty()) {
  55 + SHERPA_ONNX_LOGE("TODO(fangjun): Implement rule FST for Android");
  56 + }
  57 + }
38 #endif 58 #endif
39 59
40 - GeneratedAudio Generate(const std::string &text, int64_t sid = 0, 60 + GeneratedAudio Generate(const std::string &_text, int64_t sid = 0,
41 float speed = 1.0) const override { 61 float speed = 1.0) const override {
42 int32_t num_speakers = model_->NumSpeakers(); 62 int32_t num_speakers = model_->NumSpeakers();
43 if (num_speakers == 0 && sid != 0) { 63 if (num_speakers == 0 && sid != 0) {
@@ -55,6 +75,20 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -55,6 +75,20 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
55 sid = 0; 75 sid = 0;
56 } 76 }
57 77
  78 + std::string text = _text;
  79 + if (config_.model.debug) {
  80 + SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
  81 + }
  82 +
  83 + if (!tn_list_.empty()) {
  84 + for (const auto &tn : tn_list_) {
  85 + text = tn->Normalize(text);
  86 + if (config_.model.debug) {
  87 + SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
  88 + }
  89 + }
  90 + }
  91 +
58 std::vector<int64_t> x = lexicon_.ConvertTextToTokenIds(text); 92 std::vector<int64_t> x = lexicon_.ConvertTextToTokenIds(text);
59 if (x.empty()) { 93 if (x.empty()) {
60 SHERPA_ONNX_LOGE("Failed to convert %s to token IDs", text.c_str()); 94 SHERPA_ONNX_LOGE("Failed to convert %s to token IDs", text.c_str());
@@ -98,7 +132,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -98,7 +132,9 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
98 } 132 }
99 133
100 private: 134 private:
  135 + OfflineTtsConfig config_;
101 std::unique_ptr<OfflineTtsVitsModel> model_; 136 std::unique_ptr<OfflineTtsVitsModel> model_;
  137 + std::vector<std::unique_ptr<kaldifst::TextNormalizer>> tn_list_;
102 Lexicon lexicon_; 138 Lexicon lexicon_;
103 }; 139 };
104 140
@@ -6,19 +6,44 @@ @@ -6,19 +6,44 @@
6 6
7 #include <string> 7 #include <string>
8 8
  9 +#include "sherpa-onnx/csrc/file-utils.h"
  10 +#include "sherpa-onnx/csrc/macros.h"
9 #include "sherpa-onnx/csrc/offline-tts-impl.h" 11 #include "sherpa-onnx/csrc/offline-tts-impl.h"
  12 +#include "sherpa-onnx/csrc/text-utils.h"
10 13
11 namespace sherpa_onnx { 14 namespace sherpa_onnx {
12 15
13 -void OfflineTtsConfig::Register(ParseOptions *po) { model.Register(po); } 16 +void OfflineTtsConfig::Register(ParseOptions *po) {
  17 + model.Register(po);
14 18
15 -bool OfflineTtsConfig::Validate() const { return model.Validate(); } 19 + po->Register("tts-rule-fsts", &rule_fsts,
  20 + "It not empty, it contains a list of rule FST filenames."
  21 + "Multiple filenames are separated by a comma and they are "
  22 + "applied from left to right. An example value: "
  23 + "rule1.fst,rule2,fst,rule3.fst");
  24 +}
  25 +
  26 +bool OfflineTtsConfig::Validate() const {
  27 + if (!rule_fsts.empty()) {
  28 + std::vector<std::string> files;
  29 + SplitStringToVector(rule_fsts, ",", false, &files);
  30 + for (const auto &f : files) {
  31 + if (!FileExists(f)) {
  32 + SHERPA_ONNX_LOGE("Rule fst %s does not exist. ", f.c_str());
  33 + return false;
  34 + }
  35 + }
  36 + }
  37 +
  38 + return model.Validate();
  39 +}
16 40
17 std::string OfflineTtsConfig::ToString() const { 41 std::string OfflineTtsConfig::ToString() const {
18 std::ostringstream os; 42 std::ostringstream os;
19 43
20 os << "OfflineTtsConfig("; 44 os << "OfflineTtsConfig(";
21 - os << "model=" << model.ToString() << ")"; 45 + os << "model=" << model.ToString() << ", ";
  46 + os << "rule_fsts=\"" << rule_fsts << "\")";
22 47
23 return os.str(); 48 return os.str();
24 } 49 }
@@ -21,10 +21,17 @@ namespace sherpa_onnx { @@ -21,10 +21,17 @@ namespace sherpa_onnx {
21 21
22 struct OfflineTtsConfig { 22 struct OfflineTtsConfig {
23 OfflineTtsModelConfig model; 23 OfflineTtsModelConfig model;
  24 + // If not empty, it contains a list of rule FST filenames.
  25 + // Filenames are separated by a comma.
  26 + // Example value: rule1.fst,rule2,fst,rule3.fst
  27 + //
  28 + // If there are multiple rules, they are applied from left to right.
  29 + std::string rule_fsts;
24 30
25 OfflineTtsConfig() = default; 31 OfflineTtsConfig() = default;
26 - explicit OfflineTtsConfig(const OfflineTtsModelConfig &model)  
27 - : model(model) {} 32 + OfflineTtsConfig(const OfflineTtsModelConfig &model,
  33 + const std::string &rule_fsts)
  34 + : model(model), rule_fsts(rule_fsts) {}
28 35
29 void Register(ParseOptions *po); 36 void Register(ParseOptions *po);
30 bool Validate() const; 37 bool Validate() const;
@@ -3,6 +3,8 @@ @@ -3,6 +3,8 @@
3 // Copyright (c) 2023 Xiaomi Corporation 3 // Copyright (c) 2023 Xiaomi Corporation
4 #include "sherpa-onnx/python/csrc/offline-tts.h" 4 #include "sherpa-onnx/python/csrc/offline-tts.h"
5 5
  6 +#include <string>
  7 +
6 #include "sherpa-onnx/csrc/offline-tts.h" 8 #include "sherpa-onnx/csrc/offline-tts.h"
7 #include "sherpa-onnx/python/csrc/offline-tts-model-config.h" 9 #include "sherpa-onnx/python/csrc/offline-tts-model-config.h"
8 10
@@ -28,8 +30,10 @@ static void PybindOfflineTtsConfig(py::module *m) { @@ -28,8 +30,10 @@ static void PybindOfflineTtsConfig(py::module *m) {
28 using PyClass = OfflineTtsConfig; 30 using PyClass = OfflineTtsConfig;
29 py::class_<PyClass>(*m, "OfflineTtsConfig") 31 py::class_<PyClass>(*m, "OfflineTtsConfig")
30 .def(py::init<>()) 32 .def(py::init<>())
31 - .def(py::init<const OfflineTtsModelConfig &>(), py::arg("model")) 33 + .def(py::init<const OfflineTtsModelConfig &, const std::string &>(),
  34 + py::arg("model"), py::arg("rule_fsts") = "")
32 .def_readwrite("model", &PyClass::model) 35 .def_readwrite("model", &PyClass::model)
  36 + .def_readwrite("rule_fsts", &PyClass::rule_fsts)
33 .def("__str__", &PyClass::ToString); 37 .def("__str__", &PyClass::ToString);
34 } 38 }
35 39