Fangjun Kuang
Committed by GitHub

Add jieba for Chinese TTS models (#797)

... ... @@ -260,6 +260,7 @@ if(SHERPA_ONNX_ENABLE_TTS)
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
include(piper-phonemize)
include(cppjieba) # For Chinese TTS. It is a header-only C++ library
endif()
add_subdirectory(sherpa-onnx)
... ...
function(download_cppjieba)
include(FetchContent)
set(cppjieba_URL "https://github.com/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
set(cppjieba_URL2 "https://hub.nuaa.cf/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
set(cppjieba_HASH "SHA256=03e5264687f0efaef05487a07d49c3f4c0f743347bfbf825df4b30cc75ac5288")
# If you don't have access to the Internet,
# please pre-download cppjieba
set(possible_file_locations
$ENV{HOME}/Downloads/cppjieba-sherpa-onnx-2024-04-19.tar.gz
${CMAKE_SOURCE_DIR}/cppjieba-sherpa-onnx-2024-04-19.tar.gz
${CMAKE_BINARY_DIR}/cppjieba-sherpa-onnx-2024-04-19.tar.gz
/tmp/cppjieba-sherpa-onnx-2024-04-19.tar.gz
/star-fj/fangjun/download/github/cppjieba-sherpa-onnx-2024-04-19.tar.gz
)
foreach(f IN LISTS possible_file_locations)
if(EXISTS ${f})
set(cppjieba_URL "${f}")
file(TO_CMAKE_PATH "${cppjieba_URL}" cppjieba_URL)
message(STATUS "Found local downloaded cppjieba: ${cppjieba_URL}")
set(cppjieba_URL2)
break()
endif()
endforeach()
FetchContent_Declare(cppjieba
URL
${cppjieba_URL}
${cppjieba_URL2}
URL_HASH
${cppjieba_HASH}
)
FetchContent_GetProperties(cppjieba)
if(NOT cppjieba_POPULATED)
message(STATUS "Downloading cppjieba ${cppjieba_URL}")
FetchContent_Populate(cppjieba)
endif()
message(STATUS "cppjieba is downloaded to ${cppjieba_SOURCE_DIR}")
add_subdirectory(${cppjieba_SOURCE_DIR} ${cppjieba_BINARY_DIR} EXCLUDE_FROM_ALL)
endfunction()
download_cppjieba()
... ...
... ... @@ -132,6 +132,7 @@ list(APPEND sources
if(SHERPA_ONNX_ENABLE_TTS)
list(APPEND sources
jieba-lexicon.cc
lexicon.cc
offline-tts-character-frontend.cc
offline-tts-impl.cc
... ... @@ -184,6 +185,7 @@ endif()
if(SHERPA_ONNX_ENABLE_TTS)
target_link_libraries(sherpa-onnx-core piper_phonemize)
target_link_libraries(sherpa-onnx-core fstfar fst)
target_link_libraries(sherpa-onnx-core cppjieba)
endif()
if(SHERPA_ONNX_ENABLE_CHECK)
... ... @@ -491,6 +493,7 @@ if(SHERPA_ONNX_ENABLE_TESTS)
)
if(SHERPA_ONNX_ENABLE_TTS)
list(APPEND sherpa_onnx_test_srcs
cppjieba-test.cc
piper-phonemize-test.cc
)
endif()
... ...
// sherpa-onnx/csrc/cppjieba-test.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <iostream>
#include <regex> // NOLINT
#include <string>
#include <vector>
#include "cppjieba/Jieba.hpp"
#include "gtest/gtest.h"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
namespace sherpa_onnx {
// Please download dict files form
// https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
const char *const kDictPath = "./dict/jieba.dict.utf8";
const char *const kHmmPath = "./dict/hmm_model.utf8";
const char *const kUserDictPath = "./dict/user.dict.utf8";
const char *const kIdfPath = "./dict/idf.utf8";
const char *const kStopWordPath = "./dict/stop_words.utf8";
TEST(CppJieBa, Case1) {
if (!FileExists(kDictPath)) {
SHERPA_ONNX_LOGE("%s does not exist. Skipping test", kDictPath);
return;
}
cppjieba::Jieba jieba(kDictPath, kHmmPath, kUserDictPath, kIdfPath,
kStopWordPath);
std::vector<std::string> words;
std::vector<cppjieba::Word> jiebawords;
std::string s = "他来到了网易杭研大厦";
std::cout << s << std::endl;
std::cout << "[demo] Cut With HMM" << std::endl;
jieba.Cut(s, words, true);
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
/*
他来到了网易杭研大厦
[demo] Cut With HMM
他/来到/了/网易/杭研/大厦
*/
s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
std::cout << s << std::endl;
std::cout << "[demo] CutForSearch" << std::endl;
jieba.CutForSearch(s, words);
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
/*
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
[demo] CutForSearch
小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/,/后/在/日本/京都/大学/日本京都大学/深造
*/
std::cout << "[demo] Insert User Word" << std::endl;
jieba.Cut("男默女泪", words);
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
jieba.InsertUserWord("男默女泪");
jieba.Cut("男默女泪", words);
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
/*
[demo] Insert User Word
男默/女泪
男默女泪
*/
std::cout << "[demo] CutForSearch Word With Offset" << std::endl;
jieba.CutForSearch(s, jiebawords, true);
std::cout << jiebawords << std::endl;
/*
[demo] CutForSearch Word With Offset
[{"word": "小明", "offset": 0}, {"word": "硕士", "offset": 6}, {"word": "毕业",
"offset": 12}, {"word": "于", "offset": 18}, {"word": "中国", "offset": 21},
{"word": "科学", "offset": 27}, {"word": "学院", "offset": 30}, {"word":
"科学院", "offset": 27}, {"word": "中国科学院", "offset": 21}, {"word": "计算",
"offset": 36}, {"word": "计算所", "offset": 36}, {"word": ",", "offset": 45},
{"word": "后", "offset": 48}, {"word": "在", "offset": 51}, {"word": "日本",
"offset": 54}, {"word": "京都", "offset": 60}, {"word": "大学", "offset": 66},
{"word": "日本京都大学", "offset": 54}, {"word": " 深造", "offset": 72}]
*/
// see more test at
// https://github.com/yanyiwu/cppjieba/blob/master/test/demo.cpp
}
TEST(CppJieBa, Case2) {
if (!FileExists(kDictPath)) {
SHERPA_ONNX_LOGE("%s does not exist. Skipping test", kDictPath);
return;
}
cppjieba::Jieba jieba(kDictPath, kHmmPath, kUserDictPath, kIdfPath,
kStopWordPath);
std::string s =
"当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如"
"涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感"
"受着生命的奇迹与温柔";
std::vector<std::string> words;
bool is_hmm = true;
jieba.Cut(s, words, is_hmm);
{
std::ostringstream os;
std::string sep = "";
for (const auto &w : words) {
os << sep << w;
sep = "_";
}
std::cout << os.str() << "\n";
}
/*
当_夜幕降临_,_星光点点_,_伴随_着_微风_拂面_,
_我_在_静谧_中_感受_着_时光_的_流转_,
_思念_如_涟漪_荡漾_,_梦境_如_画卷_展开_,_我_与_自然_融为一体_,
_沉静_在_这_片_宁静_的_美丽_之中_,_感受_着_生命_的_奇迹_与_温柔
*/
s = "这里有:红的、绿的、蓝的;各种各样的颜色都有!你想要什么呢?测试.";
std::regex punct_re(":|、|;");
std::string s2 = std::regex_replace(s, punct_re, ",");
std::regex punct_re2("[.]");
s2 = std::regex_replace(s2, punct_re2, "。");
std::regex punct_re3("[?]");
s2 = std::regex_replace(s2, punct_re3, "?");
std::regex punct_re4("[!]");
s2 = std::regex_replace(s2, punct_re4, "!");
std::cout << s << "\n" << s2 << "\n";
words.clear();
jieba.Cut(s2, words, is_hmm);
{
std::ostringstream os;
std::string sep = "";
for (const auto &w : words) {
os << sep << w;
sep = "_";
}
std::cout << os.str() << "\n";
}
}
} // namespace sherpa_onnx
... ...
... ... @@ -18,6 +18,7 @@ bool FileExists(const std::string &filename) {
void AssertFileExists(const std::string &filename) {
if (!FileExists(filename)) {
SHERPA_ONNX_LOG(FATAL) << filename << " does not exist!";
exit(-1);
}
}
... ...
// sherpa-onnx/csrc/jieba-lexicon.cc
//
// Copyright (c) 2022-2024 Xiaomi Corporation
#include "sherpa-onnx/csrc/jieba-lexicon.h"
#include <fstream>
#include <regex> // NOLINT
#include <utility>
#include "cppjieba/Jieba.hpp"
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/text-utils.h"
namespace sherpa_onnx {
// implemented in ./lexicon.cc
std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is);
std::vector<int32_t> ConvertTokensToIds(
const std::unordered_map<std::string, int32_t> &token2id,
const std::vector<std::string> &tokens);
class JiebaLexicon::Impl {
public:
Impl(const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
: meta_data_(meta_data), debug_(debug) {
std::string dict = dict_dir + "/jieba.dict.utf8";
std::string hmm = dict_dir + "/hmm_model.utf8";
std::string user_dict = dict_dir + "/user.dict.utf8";
std::string idf = dict_dir + "/idf.utf8";
std::string stop_word = dict_dir + "/stop_words.utf8";
AssertFileExists(dict);
AssertFileExists(hmm);
AssertFileExists(user_dict);
AssertFileExists(idf);
AssertFileExists(stop_word);
jieba_ =
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
{
std::ifstream is(tokens);
InitTokens(is);
}
{
std::ifstream is(lexicon);
InitLexicon(is);
}
}
std::vector<std::vector<int64_t>> ConvertTextToTokenIds(
const std::string &text) const {
// see
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
std::regex punct_re{":|、|;"};
std::string s = std::regex_replace(text, punct_re, ",");
std::regex punct_re2("[.]");
s = std::regex_replace(s, punct_re2, "。");
std::regex punct_re3("[?]");
s = std::regex_replace(s, punct_re3, "?");
std::regex punct_re4("[!]");
s = std::regex_replace(s, punct_re4, "!");
std::vector<std::string> words;
bool is_hmm = true;
jieba_->Cut(text, words, is_hmm);
if (debug_) {
SHERPA_ONNX_LOGE("input text: %s", text.c_str());
SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
std::ostringstream os;
std::string sep = "";
for (const auto &w : words) {
os << sep << w;
sep = "_";
}
SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
}
std::vector<std::vector<int64_t>> ans;
std::vector<int64_t> this_sentence;
int32_t blank = token2id_.at(" ");
for (const auto &w : words) {
auto ids = ConvertWordToIds(w);
if (ids.empty()) {
SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
continue;
}
this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
this_sentence.push_back(blank);
if (w == "。" || w == "!" || w == "?" || w == ",") {
ans.push_back(std::move(this_sentence));
}
} // for (const auto &w : words)
if (!this_sentence.empty()) {
ans.push_back(std::move(this_sentence));
}
return ans;
}
private:
std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
if (word2ids_.count(w)) {
return word2ids_.at(w);
}
if (token2id_.count(w)) {
return {token2id_.at(w)};
}
std::vector<int32_t> ans;
std::vector<std::string> words = SplitUtf8(w);
for (const auto &word : words) {
if (word2ids_.count(word)) {
auto ids = ConvertWordToIds(word);
ans.insert(ans.end(), ids.begin(), ids.end());
}
}
return ans;
}
void InitTokens(std::istream &is) {
token2id_ = ReadTokens(is);
std::vector<std::pair<std::string, std::string>> puncts = {
{",", ","}, {".", "。"}, {"!", "!"}, {"?", "?"}};
for (const auto &p : puncts) {
if (token2id_.count(p.first) && !token2id_.count(p.second)) {
token2id_[p.second] = token2id_[p.first];
}
}
}
void InitLexicon(std::istream &is) {
std::string word;
std::vector<std::string> token_list;
std::string line;
std::string phone;
int32_t line_num = 0;
while (std::getline(is, line)) {
++line_num;
std::istringstream iss(line);
token_list.clear();
iss >> word;
ToLowerCase(&word);
if (word2ids_.count(word)) {
SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
word.c_str(), line_num, line.c_str());
continue;
}
while (iss >> phone) {
token_list.push_back(std::move(phone));
}
std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
if (ids.empty()) {
continue;
}
word2ids_.insert({std::move(word), std::move(ids)});
}
}
private:
// lexicon.txt is saved in word2ids_
std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
// tokens.txt is saved in token2id_
std::unordered_map<std::string, int32_t> token2id_;
OfflineTtsVitsModelMetaData meta_data_;
std::unique_ptr<cppjieba::Jieba> jieba_;
bool debug_ = false;
};
JiebaLexicon::~JiebaLexicon() = default;
JiebaLexicon::JiebaLexicon(const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data,
bool debug)
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, meta_data,
debug)) {}
std::vector<std::vector<int64_t>> JiebaLexicon::ConvertTextToTokenIds(
const std::string &text, const std::string &unused_voice /*= ""*/) const {
return impl_->ConvertTextToTokenIds(text);
}
} // namespace sherpa_onnx
... ...
// sherpa-onnx/csrc/jieba-lexicon.h
//
// Copyright (c) 2022-2024 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_JIEBA_LEXICON_H_
#define SHERPA_ONNX_CSRC_JIEBA_LEXICON_H_
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
namespace sherpa_onnx {
class JiebaLexicon : public OfflineTtsFrontend {
public:
~JiebaLexicon() override;
JiebaLexicon(const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
#if __ANDROID_API__ >= 9
JiebaLexicon(AAssetManager *mgr, const std::string &lexicon,
const std::string &tokens, const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data);
#endif
std::vector<std::vector<int64_t>> ConvertTextToTokenIds(
const std::string &text,
const std::string &unused_voice = "") const override;
private:
class Impl;
std::unique_ptr<Impl> impl_;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_JIEBA_LEXICON_H_
... ...
... ... @@ -76,7 +76,7 @@ static std::vector<std::string> ProcessHeteronyms(
// Note: We don't use SymbolTable here since tokens may contain a blank
// in the first column
static std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
std::unordered_map<std::string, int32_t> token2id;
std::string line;
... ... @@ -113,7 +113,7 @@ static std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
return token2id;
}
static std::vector<int32_t> ConvertTokensToIds(
std::vector<int32_t> ConvertTokensToIds(
const std::unordered_map<std::string, int32_t> &token2id,
const std::vector<std::string> &tokens) {
std::vector<int32_t> ids;
... ...
... ... @@ -19,6 +19,7 @@
#include "fst/extensions/far/far.h"
#include "kaldifst/csrc/kaldi-fst-io.h"
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/jieba-lexicon.h"
#include "sherpa-onnx/csrc/lexicon.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
... ... @@ -290,9 +291,26 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
void InitFrontend() {
const auto &meta_data = model_->GetMetaData();
if (meta_data.jieba && config_.model.vits.dict_dir.empty()) {
SHERPA_ONNX_LOGE(
"Please provide --vits-dict-dir for Chinese TTS models using jieba");
exit(-1);
}
if (!meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
SHERPA_ONNX_LOGE(
"Current model is not using jieba but you provided --vits-dict-dir");
exit(-1);
}
if (meta_data.frontend == "characters") {
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
config_.model.vits.tokens, meta_data);
} else if (meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
frontend_ = std::make_unique<JiebaLexicon>(
config_.model.vits.lexicon, config_.model.vits.tokens,
config_.model.vits.dict_dir, model_->GetMetaData(),
config_.model.debug);
} else if ((meta_data.is_piper || meta_data.is_coqui ||
meta_data.is_icefall) &&
!config_.model.vits.data_dir.empty()) {
... ...
... ... @@ -4,6 +4,8 @@
#include "sherpa-onnx/csrc/offline-tts-vits-model-config.h"
#include <vector>
#include "sherpa-onnx/csrc/file-utils.h"
#include "sherpa-onnx/csrc/macros.h"
... ... @@ -16,6 +18,9 @@ void OfflineTtsVitsModelConfig::Register(ParseOptions *po) {
po->Register("vits-data-dir", &data_dir,
"Path to the directory containing dict for espeak-ng. If it is "
"given, --vits-lexicon is ignored.");
po->Register("vits-dict-dir", &dict_dir,
"Path to the directory containing dict for jieba. Used only for "
"Chinese TTS models using jieba");
po->Register("vits-noise-scale", &noise_scale, "noise_scale for VITS models");
po->Register("vits-noise-scale-w", &noise_scale_w,
"noise_scale_w for VITS models");
... ... @@ -64,12 +69,24 @@ bool OfflineTtsVitsModelConfig::Validate() const {
}
if (!FileExists(data_dir + "/intonations")) {
SHERPA_ONNX_LOGE("%s/intonations does not exist. Skipping test",
data_dir.c_str());
SHERPA_ONNX_LOGE("%s/intonations does not exist.", data_dir.c_str());
return false;
}
}
if (!dict_dir.empty()) {
std::vector<std::string> required_files = {
"jieba.dict.utf8", "hmm_model.utf8", "user.dict.utf8",
"idf.utf8", "stop_words.utf8",
};
for (const auto &f : required_files) {
if (!FileExists(dict_dir + "/" + f)) {
SHERPA_ONNX_LOGE("%s/%s does not exist.", data_dir.c_str(), f.c_str());
return false;
}
}
}
return true;
}
... ... @@ -81,6 +98,7 @@ std::string OfflineTtsVitsModelConfig::ToString() const {
os << "lexicon=\"" << lexicon << "\", ";
os << "tokens=\"" << tokens << "\", ";
os << "data_dir=\"" << data_dir << "\", ";
os << "dict_dir=\"" << dict_dir << "\", ";
os << "noise_scale=" << noise_scale << ", ";
os << "noise_scale_w=" << noise_scale_w << ", ";
os << "length_scale=" << length_scale << ")";
... ...
... ... @@ -20,6 +20,9 @@ struct OfflineTtsVitsModelConfig {
// data_dir is for piper-phonemize, which uses espeak-ng
std::string data_dir;
// Used for Chinese TTS models using jieba
std::string dict_dir;
float noise_scale = 0.667;
float noise_scale_w = 0.8;
float length_scale = 1;
... ... @@ -33,12 +36,14 @@ struct OfflineTtsVitsModelConfig {
const std::string &lexicon,
const std::string &tokens,
const std::string &data_dir,
const std::string &dict_dir,
float noise_scale = 0.667,
float noise_scale_w = 0.8, float length_scale = 1)
: model(model),
lexicon(lexicon),
tokens(tokens),
data_dir(data_dir),
dict_dir(dict_dir),
noise_scale(noise_scale),
noise_scale_w(noise_scale_w),
length_scale(length_scale) {}
... ...
... ... @@ -22,6 +22,10 @@ struct OfflineTtsVitsModelMetaData {
bool is_coqui = false;
bool is_icefall = false;
// for Chinese TTS models from
// https://github.com/Plachtaa/VITS-fast-fine-tuning
int32_t jieba = 0;
// the following options are for models from coqui-ai/TTS
int32_t blank_id = 0;
int32_t bos_id = 0;
... ...
... ... @@ -93,6 +93,7 @@ class OfflineTtsVitsModel::Impl {
SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.frontend, "frontend",
"");
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.jieba, "jieba", 0);
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.blank_id, "blank_id", 0);
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0);
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0);
... ...
... ... @@ -16,15 +16,17 @@ void PybindOfflineTtsVitsModelConfig(py::module *m) {
py::class_<PyClass>(*m, "OfflineTtsVitsModelConfig")
.def(py::init<>())
.def(py::init<const std::string &, const std::string &,
const std::string &, const std::string &, float, float,
float>(),
const std::string &, const std::string &,
const std::string &, float, float, float>(),
py::arg("model"), py::arg("lexicon"), py::arg("tokens"),
py::arg("data_dir") = "", py::arg("noise_scale") = 0.667,
py::arg("noise_scale_w") = 0.8, py::arg("length_scale") = 1.0)
py::arg("data_dir") = "", py::arg("dict_dir") = "",
py::arg("noise_scale") = 0.667, py::arg("noise_scale_w") = 0.8,
py::arg("length_scale") = 1.0)
.def_readwrite("model", &PyClass::model)
.def_readwrite("lexicon", &PyClass::lexicon)
.def_readwrite("tokens", &PyClass::tokens)
.def_readwrite("data_dir", &PyClass::data_dir)
.def_readwrite("dict_dir", &PyClass::dict_dir)
.def_readwrite("noise_scale", &PyClass::noise_scale)
.def_readwrite("noise_scale_w", &PyClass::noise_scale_w)
.def_readwrite("length_scale", &PyClass::length_scale)
... ...