Fangjun Kuang
Committed by GitHub

Add MFC TTS example on Windows (#378)

正在显示 29 个修改的文件 包含 994 行增加22 行删除
... ... @@ -186,7 +186,7 @@ int32_t main(int32_t argc, char *argv[]) {
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text, sid);
SherpaOnnxOfflineTtsGenerate(tts, text, sid, 1.0);
SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
... ...
... ... @@ -49,7 +49,7 @@ func main() {
log.Println("Start generating!")
audio := tts.Generate(text, sid)
audio := tts.Generate(text, sid, 1.0)
log.Println("Done!")
... ...
// NonStreamingTextToSpeech.cpp : Defines the class behaviors for the application.
//
#include "pch.h"
#include "framework.h"
#include "NonStreamingTextToSpeech.h"
#include "NonStreamingTextToSpeechDlg.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// CNonStreamingTextToSpeechApp
BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechApp, CWinApp)
ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
END_MESSAGE_MAP()
// CNonStreamingTextToSpeechApp construction
CNonStreamingTextToSpeechApp::CNonStreamingTextToSpeechApp()
{
// TODO: add construction code here,
// Place all significant initialization in InitInstance
}
// The one and only CNonStreamingTextToSpeechApp object
CNonStreamingTextToSpeechApp theApp;
// CNonStreamingTextToSpeechApp initialization
BOOL CNonStreamingTextToSpeechApp::InitInstance()
{
CWinApp::InitInstance();
// Create the shell manager, in case the dialog contains
// any shell tree view or shell list view controls.
CShellManager *pShellManager = new CShellManager;
// Activate "Windows Native" visual manager for enabling themes in MFC controls
CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));
// Standard initialization
// If you are not using these features and wish to reduce the size
// of your final executable, you should remove from the following
// the specific initialization routines you do not need
// Change the registry key under which our settings are stored
// TODO: You should modify this string to be something appropriate
// such as the name of your company or organization
SetRegistryKey(_T("Local AppWizard-Generated Applications"));
CNonStreamingTextToSpeechDlg dlg;
m_pMainWnd = &dlg;
INT_PTR nResponse = dlg.DoModal();
if (nResponse == IDOK)
{
// TODO: Place code here to handle when the dialog is
// dismissed with OK
}
else if (nResponse == IDCANCEL)
{
// TODO: Place code here to handle when the dialog is
// dismissed with Cancel
}
else if (nResponse == -1)
{
TRACE(traceAppMsg, 0, "Warning: dialog creation failed, so application is terminating unexpectedly.\n");
TRACE(traceAppMsg, 0, "Warning: if you are using MFC controls on the dialog, you cannot #define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
}
// Delete the shell manager created above.
if (pShellManager != nullptr)
{
delete pShellManager;
}
#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
ControlBarCleanUp();
#endif
// Since the dialog has been closed, return FALSE so that we exit the
// application, rather than start the application's message pump.
return FALSE;
}
... ...
// NonStreamingTextToSpeech.h : main header file for the PROJECT_NAME application
//
#pragma once
#ifndef __AFXWIN_H__
#error "include 'pch.h' before including this file for PCH"
#endif
#include "resource.h" // main symbols
// CNonStreamingTextToSpeechApp:
// See NonStreamingTextToSpeech.cpp for the implementation of this class
//
class CNonStreamingTextToSpeechApp : public CWinApp
{
public:
CNonStreamingTextToSpeechApp();
// Overrides
public:
virtual BOOL InitInstance();
// Implementation
DECLARE_MESSAGE_MAP()
};
extern CNonStreamingTextToSpeechApp theApp;
... ...
B// Microsoft Visual C++ generated resource script.
... ...
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>17.0</VCProjectVersion>
<ProjectGuid>{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}</ProjectGuid>
<Keyword>MFCProj</Keyword>
<RootNamespace>NonStreamingTextToSpeech</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Dynamic</UseOfMfc>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Dynamic</UseOfMfc>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Dynamic</UseOfMfc>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
<UseOfMfc>Static</UseOfMfc>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
<Import Project="sherpa-onnx-deps.props" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
<Midl>
<MkTypLibCompatible>false</MkTypLibCompatible>
<ValidateAllParameters>true</ValidateAllParameters>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>0x0409</Culture>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude Include="framework.h" />
<ClInclude Include="NonStreamingTextToSpeech.h" />
<ClInclude Include="NonStreamingTextToSpeechDlg.h" />
<ClInclude Include="pch.h" />
<ClInclude Include="Resource.h" />
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="NonStreamingTextToSpeech.cpp" />
<ClCompile Include="NonStreamingTextToSpeechDlg.cpp" />
<ClCompile Include="pch.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="NonStreamingTextToSpeech.rc" />
</ItemGroup>
<ItemGroup>
<None Include="res\NonStreamingTextToSpeech.rc2" />
</ItemGroup>
<ItemGroup>
<Image Include="res\NonStreamingTextToSpeech.ico" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
\ No newline at end of file
... ...
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="NonStreamingTextToSpeech.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="NonStreamingTextToSpeechDlg.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="framework.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="targetver.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Resource.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="pch.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="NonStreamingTextToSpeech.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="NonStreamingTextToSpeechDlg.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="pch.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="NonStreamingTextToSpeech.rc">
<Filter>Resource Files</Filter>
</ResourceCompile>
</ItemGroup>
<ItemGroup>
<None Include="res\NonStreamingTextToSpeech.rc2">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
<ItemGroup>
<Image Include="res\NonStreamingTextToSpeech.ico">
<Filter>Resource Files</Filter>
</Image>
</ItemGroup>
</Project>
\ No newline at end of file
... ...
// NonStreamingTextToSpeechDlg.cpp : implementation file
//
#include "pch.h"
#include "framework.h"
#include "NonStreamingTextToSpeech.h"
#include "NonStreamingTextToSpeechDlg.h"
#include "afxdialogex.h"
#include <fstream>
#include <stdexcept>
#include <string>
#include <vector>
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// CAboutDlg dialog used for App About
class CAboutDlg : public CDialogEx
{
public:
CAboutDlg();
// Dialog Data
#ifdef AFX_DESIGN_TIME
enum { IDD = IDD_ABOUTBOX };
#endif
protected:
virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
// Implementation
protected:
DECLARE_MESSAGE_MAP()
};
CAboutDlg::CAboutDlg() : CDialogEx(IDD_ABOUTBOX)
{
}
void CAboutDlg::DoDataExchange(CDataExchange* pDX)
{
CDialogEx::DoDataExchange(pDX);
}
BEGIN_MESSAGE_MAP(CAboutDlg, CDialogEx)
END_MESSAGE_MAP()
// CNonStreamingTextToSpeechDlg dialog
// see
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
static std::wstring Utf8ToUtf16(const std::string &utf8) {
std::vector<unsigned long> unicode;
size_t i = 0;
while (i < utf8.size()) {
unsigned long uni;
size_t todo;
bool error = false;
unsigned char ch = utf8[i++];
if (ch <= 0x7F) {
uni = ch;
todo = 0;
} else if (ch <= 0xBF) {
throw std::logic_error("not a UTF-8 string");
} else if (ch <= 0xDF) {
uni = ch & 0x1F;
todo = 1;
} else if (ch <= 0xEF) {
uni = ch & 0x0F;
todo = 2;
} else if (ch <= 0xF7) {
uni = ch & 0x07;
todo = 3;
} else {
throw std::logic_error("not a UTF-8 string");
}
for (size_t j = 0; j < todo; ++j) {
if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
unsigned char ch = utf8[i++];
if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
uni <<= 6;
uni += ch & 0x3F;
}
if (uni >= 0xD800 && uni <= 0xDFFF)
throw std::logic_error("not a UTF-8 string");
if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
unicode.push_back(uni);
}
std::wstring utf16;
for (size_t i = 0; i < unicode.size(); ++i) {
unsigned long uni = unicode[i];
if (uni <= 0xFFFF) {
utf16 += (wchar_t)uni;
} else {
uni -= 0x10000;
utf16 += (wchar_t)((uni >> 10) + 0xD800);
utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
}
}
return utf16;
}
// The system calls this function to obtain the cursor to display while the user drags
// the minimized window.
HCURSOR CNonStreamingTextToSpeechDlg::OnQueryDragIcon()
{
return static_cast<HCURSOR>(m_hIcon);
}
void AppendTextToEditCtrl(CEdit& e, const std::string &s) {
// get the initial text length
int nLength = e.GetWindowTextLength();
// put the selection at the end of text
e.SetSel(nLength, nLength);
// replace the selection
std::wstring wstr = Utf8ToUtf16(s);
// my_text_.ReplaceSel(wstr.c_str());
e.ReplaceSel(wstr.c_str());
}
void AppendLineToMultilineEditCtrl(CEdit& e, const std::string &s) {
AppendTextToEditCtrl(e, "\r\n" + s);
}
CNonStreamingTextToSpeechDlg::CNonStreamingTextToSpeechDlg(CWnd* pParent /*=nullptr*/)
: CDialogEx(IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG, pParent)
{
m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
}
void CNonStreamingTextToSpeechDlg::DoDataExchange(CDataExchange* pDX)
{
CDialogEx::DoDataExchange(pDX);
DDX_Control(pDX, IDC_HINT, my_hint_);
DDX_Control(pDX, IDC_SPEAKER, speaker_id_);
DDX_Control(pDX, IDC_SPEED, speed_);
DDX_Control(pDX, IDOK, generate_btn_);
DDX_Control(pDX, IDC_TEXT, my_text_);
}
BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechDlg, CDialogEx)
ON_WM_SYSCOMMAND()
ON_WM_PAINT()
ON_WM_QUERYDRAGICON()
ON_BN_CLICKED(IDOK, &CNonStreamingTextToSpeechDlg::OnBnClickedOk)
END_MESSAGE_MAP()
// CNonStreamingTextToSpeechDlg message handlers
BOOL CNonStreamingTextToSpeechDlg::OnInitDialog()
{
CDialogEx::OnInitDialog();
// Add "About..." menu item to system menu.
// IDM_ABOUTBOX must be in the system command range.
ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
ASSERT(IDM_ABOUTBOX < 0xF000);
CMenu* pSysMenu = GetSystemMenu(FALSE);
if (pSysMenu != nullptr)
{
BOOL bNameValid;
CString strAboutMenu;
bNameValid = strAboutMenu.LoadString(IDS_ABOUTBOX);
ASSERT(bNameValid);
if (!strAboutMenu.IsEmpty())
{
pSysMenu->AppendMenu(MF_SEPARATOR);
pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
}
}
// Set the icon for this dialog. The framework does this automatically
// when the application's main window is not a dialog
SetIcon(m_hIcon, TRUE); // Set big icon
SetIcon(m_hIcon, FALSE); // Set small icon
// TODO: Add extra initialization here
Init();
return TRUE; // return TRUE unless you set the focus to a control
}
void CNonStreamingTextToSpeechDlg::OnSysCommand(UINT nID, LPARAM lParam)
{
if ((nID & 0xFFF0) == IDM_ABOUTBOX)
{
CAboutDlg dlgAbout;
dlgAbout.DoModal();
}
else
{
CDialogEx::OnSysCommand(nID, lParam);
}
}
// If you add a minimize button to your dialog, you will need the code below
// to draw the icon . For MFC applications using the document/view model,
// this is automatically done for you by the framework.
void CNonStreamingTextToSpeechDlg::OnPaint()
{
if (IsIconic())
{
CPaintDC dc(this); // device context for painting
SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()), 0);
// Center icon in client rectangle
int cxIcon = GetSystemMetrics(SM_CXICON);
int cyIcon = GetSystemMetrics(SM_CYICON);
CRect rect;
GetClientRect(&rect);
int x = (rect.Width() - cxIcon + 1) / 2;
int y = (rect.Height() - cyIcon + 1) / 2;
// Draw the icon
dc.DrawIcon(x, y, m_hIcon);
}
else
{
CDialogEx::OnPaint();
}
}
bool Exists(const std::string &filename) {
std::ifstream is(filename);
return is.good();
}
void CNonStreamingTextToSpeechDlg::InitHint() {
AppendLineToMultilineEditCtrl(my_hint_, "Speaker ID: Used only for multi-speaker models. Example value: 0");
AppendLineToMultilineEditCtrl(my_hint_, "Speed: Larger -> Faster in speech speed. Example value: 1.0");
AppendLineToMultilineEditCtrl(my_hint_, "\r\n\r\nPlease input your text and click the button Generate");
}
void CNonStreamingTextToSpeechDlg::Init() {
InitHint();
speaker_id_.SetWindowText(Utf8ToUtf16("0").c_str());
speed_.SetWindowText(Utf8ToUtf16("1.0").c_str());
bool ok = true;
std::string error_message = "--------------------";
if (!Exists("./model.onnx")) {
error_message += "Cannot find ./model.onnx\r\n";
ok = false;
}
if (!Exists("./lexicon.txt")) {
error_message += "Cannot find ./lexicon.txt\r\n";
ok = false;
}
if (!Exists("./tokens.txt")) {
error_message += "Cannot find ./tokens.txt\r\n";
ok = false;
}
if (!ok) {
generate_btn_.EnableWindow(FALSE);
error_message +=
"\r\nPlease refer to\r\n"
"https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html";
error_message += "\r\nto download models.\r\n";
error_message += "\r\nWe given an example below\r\n\r\n";
error_message +=
"wget -O model.onnx "
"https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/"
"vits-aishell3.onnx\r\n";
error_message +=
"wget "
"https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/"
"lexicon.txt\r\n";
error_message +=
"wget "
"https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/"
"tokens.txt\r\n";
AppendLineToMultilineEditCtrl(my_hint_, error_message);
return;
}
// Now init tts
SherpaOnnxOfflineTtsConfig config;
memset(&config, 0, sizeof(config));
config.model.debug = 0;
config.model.num_threads = 1;
config.model.provider = "cpu";
config.model.vits.model = "./model.onnx";
config.model.vits.lexicon = "./lexicon.txt";
config.model.vits.tokens = "./tokens.txt";
tts_ = SherpaOnnxCreateOfflineTts(&config);
}
CNonStreamingTextToSpeechDlg::~CNonStreamingTextToSpeechDlg() {
if (tts_) {
SherpaOnnxDestroyOfflineTts(tts_);
}
}
void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
// TODO: Add your control notification handler code here
CString s;
speaker_id_.GetWindowText(s);
int speaker_id = _ttoi(s);
if (speaker_id < 0) {
AfxMessageBox(Utf8ToUtf16("Please input a valid speaker ID").c_str(), MB_OK);
return;
}
speed_.GetWindowText(s);
float speed = _ttof(s);
if (speed < 0) {
AfxMessageBox(Utf8ToUtf16("Please input a valid speed").c_str(), MB_OK);
return;
}
my_text_.GetWindowText(s);
CT2CA pszConvertedAnsiString(s);
std::string ss(pszConvertedAnsiString);
if (ss.empty()) {
AfxMessageBox(Utf8ToUtf16("Please input your text").c_str(), MB_OK);
return;
}
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts_, ss.c_str(), speaker_id, speed);
std::string filename = "./generated.wav";
int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
filename.c_str());
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
if (ok) {
AfxMessageBox(Utf8ToUtf16("Saved to ./generated.wav successfully").c_str(), MB_OK);
} else {
AfxMessageBox(Utf8ToUtf16("Failed to save to ./generated.wav").c_str(), MB_OK);
}
//CDialogEx::OnOK();
}
... ...
// NonStreamingTextToSpeechDlg.h : header file
//
#pragma once
#include "sherpa-onnx/c-api/c-api.h"
// CNonStreamingTextToSpeechDlg dialog
class CNonStreamingTextToSpeechDlg : public CDialogEx
{
// Construction
public:
CNonStreamingTextToSpeechDlg(CWnd* pParent = nullptr); // standard constructor
~CNonStreamingTextToSpeechDlg();
// Dialog Data
#ifdef AFX_DESIGN_TIME
enum { IDD = IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG };
#endif
protected:
virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
// Implementation
protected:
HICON m_hIcon;
// Generated message map functions
virtual BOOL OnInitDialog();
afx_msg void OnSysCommand(UINT nID, LPARAM lParam);
afx_msg void OnPaint();
afx_msg HCURSOR OnQueryDragIcon();
DECLARE_MESSAGE_MAP()
public:
CEdit my_hint_;
CEdit speaker_id_;
CEdit speed_;
void Init();
void InitHint();
CButton generate_btn_;
afx_msg void OnBnClickedOk();
SherpaOnnxOfflineTts *tts_;
CEdit my_text_;
};
... ...
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by NonStreamingTextToSpeech.rc
//
#define IDM_ABOUTBOX 0x0010
#define IDD_ABOUTBOX 100
#define IDS_ABOUTBOX 101
#define IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG 102
#define IDR_MAINFRAME 128
#define IDC_SPEAKER 1000
#define IDC_SPEED 1003
#define IDC_TEXT 1004
#define IDC_HINT 1005
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 130
#define _APS_NEXT_COMMAND_VALUE 32771
#define _APS_NEXT_CONTROL_VALUE 1006
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif
... ...
#pragma once
#ifndef VC_EXTRALEAN
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
#endif
#include "targetver.h"
#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be explicit
// turns off MFC's hiding of some common and often safely ignored warning messages
#define _AFX_ALL_WARNINGS
#include <afxwin.h> // MFC core and standard components
#include <afxext.h> // MFC extensions
#ifndef _AFX_NO_OLE_SUPPORT
#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls
#endif
#ifndef _AFX_NO_AFXCMN_SUPPORT
#include <afxcmn.h> // MFC support for Windows Common Controls
#endif // _AFX_NO_AFXCMN_SUPPORT
#include <afxcontrolbars.h> // MFC support for ribbons and control bars
... ...
// pch.cpp: source file corresponding to the pre-compiled header
#include "pch.h"
// When you are using pre-compiled headers, this source file is necessary for compilation to succeed.
... ...
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for future builds.
// This also affects IntelliSense performance, including code completion and many code browsing features.
// However, files listed here are ALL re-compiled if any one of them is updated between builds.
// Do not add files here that you will be updating frequently as this negates the performance advantage.
#ifndef PCH_H
#define PCH_H
// add headers that you want to pre-compile here
#include "framework.h"
#endif //PCH_H
... ...
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
<SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
<SherpaOnnxLibraries>
sherpa-onnx-portaudio_static.lib;
sherpa-onnx-c-api.lib;
sherpa-onnx-core.lib;
kaldi-decoder-core.lib;
sherpa-onnx-kaldifst-core.lib;
sherpa-onnx-fst.lib;
kaldi-native-fbank-core.lib;
absl_base.lib;
absl_city.lib;
absl_hash.lib;
absl_low_level_hash.lib;
absl_raw_hash_set.lib;
absl_raw_logging_internal.lib;
absl_throw_delegate.lib;
clog.lib;
cpuinfo.lib;
flatbuffers.lib;
libprotobuf-lite.lib;
onnx.lib;
onnx_proto.lib;
onnxruntime_common.lib;
onnxruntime_flatbuffers.lib;
onnxruntime_framework.lib;
onnxruntime_graph.lib;
onnxruntime_mlas.lib;
onnxruntime_optimizer.lib;
onnxruntime_providers.lib;
onnxruntime_session.lib;
onnxruntime_util.lib;
re2.lib;
</SherpaOnnxLibraries>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>
$(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
$(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>
... ...
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>
... ...
... ... @@ -7,6 +7,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingTextToSpeech", "NonStreamingTextToSpeech\NonStreamingTextToSpeech.vcxproj", "{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
... ... @@ -31,6 +33,14 @@ Global
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x64.ActiveCfg = Debug|x64
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x64.Build.0 = Debug|x64
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x86.ActiveCfg = Debug|Win32
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x86.Build.0 = Debug|Win32
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x64.ActiveCfg = Release|x64
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x64.Build.0 = Release|x64
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x86.ActiveCfg = Release|Win32
{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
... ...
... ... @@ -572,11 +572,11 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
return tts
}
func (tts *OfflineTts) Generate(text string, sid int) *GeneratedAudio {
func (tts *OfflineTts) Generate(text string, sid int, speed float32) *GeneratedAudio {
s := C.CString(text)
defer C.free(unsafe.Pointer(s))
audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid))
audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid), C.float(speed))
defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
ans := &GeneratedAudio{}
... ...
... ... @@ -568,8 +568,9 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {
sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid);
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
float speed) {
sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid, speed);
if (audio.samples.empty()) {
return nullptr;
... ...
... ... @@ -639,7 +639,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
// The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned
// pointer to avoid memory leak.
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid);
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
float speed);
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
const SherpaOnnxGeneratedAudio *p);
... ...
... ... @@ -18,8 +18,8 @@ class OfflineTtsImpl {
static std::unique_ptr<OfflineTtsImpl> Create(const OfflineTtsConfig &config);
virtual GeneratedAudio Generate(const std::string &text,
int64_t sid = 0) const = 0;
virtual GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
float speed = 1.0) const = 0;
};
} // namespace sherpa_onnx
... ...
... ... @@ -24,8 +24,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
model_->Punctuations(), model_->Language(),
config.model.debug) {}
GeneratedAudio Generate(const std::string &text,
int64_t sid = 0) const override {
GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
float speed = 1.0) const override {
int32_t num_speakers = model_->NumSpeakers();
if (num_speakers == 0 && sid != 0) {
SHERPA_ONNX_LOGE(
... ... @@ -66,7 +66,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
Ort::Value x_tensor = Ort::Value::CreateTensor(
memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
Ort::Value audio = model_->Run(std::move(x_tensor), sid);
Ort::Value audio = model_->Run(std::move(x_tensor), sid, speed);
std::vector<int64_t> audio_shape =
audio.GetTensorTypeAndShapeInfo().GetShape();
... ...
... ... @@ -17,7 +17,7 @@ void OfflineTtsVitsModelConfig::Register(ParseOptions *po) {
po->Register("vits-noise-scale-w", &noise_scale_w,
"noise_scale_w for VITS models");
po->Register("vits-length-scale", &length_scale,
"length_scale for VITS models");
"Speech speed. Larger->Slower; Smaller->faster.");
}
bool OfflineTtsVitsModelConfig::Validate() const {
... ...
... ... @@ -26,7 +26,7 @@ class OfflineTtsVitsModel::Impl {
Init(buf.data(), buf.size());
}
Ort::Value Run(Ort::Value x, int64_t sid) {
Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
auto memory_info =
Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
... ... @@ -48,6 +48,10 @@ class OfflineTtsVitsModel::Impl {
float length_scale = config_.vits.length_scale;
float noise_scale_w = config_.vits.noise_scale_w;
if (speed != 1 && speed > 0) {
length_scale = 1. / speed;
}
Ort::Value noise_scale_tensor =
Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);
... ... @@ -139,8 +143,9 @@ OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config)
OfflineTtsVitsModel::~OfflineTtsVitsModel() = default;
Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/) {
return impl_->Run(std::move(x), sid);
Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/,
float speed /*= 1.0*/) {
return impl_->Run(std::move(x), sid, speed);
}
int32_t OfflineTtsVitsModel::SampleRate() const { return impl_->SampleRate(); }
... ...
... ... @@ -29,7 +29,7 @@ class OfflineTtsVitsModel {
* @return Return a float32 tensor containing audio samples. You can flatten
* it to a 1-D tensor.
*/
Ort::Value Run(Ort::Value x, int64_t sid = 0);
Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0);
// Sample rate of the generated audio
int32_t SampleRate() const;
... ...
... ... @@ -28,9 +28,9 @@ OfflineTts::OfflineTts(const OfflineTtsConfig &config)
OfflineTts::~OfflineTts() = default;
GeneratedAudio OfflineTts::Generate(const std::string &text,
int64_t sid /*=0*/) const {
return impl_->Generate(text, sid);
GeneratedAudio OfflineTts::Generate(const std::string &text, int64_t sid /*=0*/,
float speed /*= 1.0*/) const {
return impl_->Generate(text, sid, speed);
}
} // namespace sherpa_onnx
... ...
... ... @@ -43,7 +43,8 @@ class OfflineTts {
// trained using the VCTK dataset. It is not used for
// single-speaker models, e.g., models trained using the ljspeech
// dataset.
GeneratedAudio Generate(const std::string &text, int64_t sid = 0) const;
GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
float speed = 1.0) const;
private:
std::unique_ptr<OfflineTtsImpl> impl_;
... ...
... ... @@ -40,7 +40,8 @@ void PybindOfflineTts(py::module *m) {
using PyClass = OfflineTts;
py::class_<PyClass>(*m, "OfflineTts")
.def(py::init<const OfflineTtsConfig &>(), py::arg("config"))
.def("generate", &PyClass::Generate, py::arg("text"), py::arg("sid") = 0);
.def("generate", &PyClass::Generate, py::arg("text"), py::arg("sid") = 0,
py::arg("speed") = 1.0);
}
} // namespace sherpa_onnx
... ...