Fangjun Kuang
Committed by GitHub

Add MFC TTS example on Windows (#378)

正在显示 29 个修改的文件 包含 994 行增加22 行删除
@@ -186,7 +186,7 @@ int32_t main(int32_t argc, char *argv[]) { @@ -186,7 +186,7 @@ int32_t main(int32_t argc, char *argv[]) {
186 SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config); 186 SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&config);
187 187
188 const SherpaOnnxGeneratedAudio *audio = 188 const SherpaOnnxGeneratedAudio *audio =
189 - SherpaOnnxOfflineTtsGenerate(tts, text, sid); 189 + SherpaOnnxOfflineTtsGenerate(tts, text, sid, 1.0);
190 190
191 SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename); 191 SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate, filename);
192 192
@@ -49,7 +49,7 @@ func main() { @@ -49,7 +49,7 @@ func main() {
49 49
50 log.Println("Start generating!") 50 log.Println("Start generating!")
51 51
52 - audio := tts.Generate(text, sid) 52 + audio := tts.Generate(text, sid, 1.0)
53 53
54 log.Println("Done!") 54 log.Println("Done!")
55 55
  1 +
  2 +// NonStreamingTextToSpeech.cpp : Defines the class behaviors for the application.
  3 +//
  4 +
  5 +#include "pch.h"
  6 +#include "framework.h"
  7 +#include "NonStreamingTextToSpeech.h"
  8 +#include "NonStreamingTextToSpeechDlg.h"
  9 +
  10 +#ifdef _DEBUG
  11 +#define new DEBUG_NEW
  12 +#endif
  13 +
  14 +
  15 +// CNonStreamingTextToSpeechApp
  16 +
  17 +BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechApp, CWinApp)
  18 + ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
  19 +END_MESSAGE_MAP()
  20 +
  21 +
  22 +// CNonStreamingTextToSpeechApp construction
  23 +
  24 +CNonStreamingTextToSpeechApp::CNonStreamingTextToSpeechApp()
  25 +{
  26 + // TODO: add construction code here,
  27 + // Place all significant initialization in InitInstance
  28 +}
  29 +
  30 +
  31 +// The one and only CNonStreamingTextToSpeechApp object
  32 +
  33 +CNonStreamingTextToSpeechApp theApp;
  34 +
  35 +
  36 +// CNonStreamingTextToSpeechApp initialization
  37 +
  38 +BOOL CNonStreamingTextToSpeechApp::InitInstance()
  39 +{
  40 + CWinApp::InitInstance();
  41 +
  42 +
  43 + // Create the shell manager, in case the dialog contains
  44 + // any shell tree view or shell list view controls.
  45 + CShellManager *pShellManager = new CShellManager;
  46 +
  47 + // Activate "Windows Native" visual manager for enabling themes in MFC controls
  48 + CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));
  49 +
  50 + // Standard initialization
  51 + // If you are not using these features and wish to reduce the size
  52 + // of your final executable, you should remove from the following
  53 + // the specific initialization routines you do not need
  54 + // Change the registry key under which our settings are stored
  55 + // TODO: You should modify this string to be something appropriate
  56 + // such as the name of your company or organization
  57 + SetRegistryKey(_T("Local AppWizard-Generated Applications"));
  58 +
  59 + CNonStreamingTextToSpeechDlg dlg;
  60 + m_pMainWnd = &dlg;
  61 + INT_PTR nResponse = dlg.DoModal();
  62 + if (nResponse == IDOK)
  63 + {
  64 + // TODO: Place code here to handle when the dialog is
  65 + // dismissed with OK
  66 + }
  67 + else if (nResponse == IDCANCEL)
  68 + {
  69 + // TODO: Place code here to handle when the dialog is
  70 + // dismissed with Cancel
  71 + }
  72 + else if (nResponse == -1)
  73 + {
  74 + TRACE(traceAppMsg, 0, "Warning: dialog creation failed, so application is terminating unexpectedly.\n");
  75 + TRACE(traceAppMsg, 0, "Warning: if you are using MFC controls on the dialog, you cannot #define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
  76 + }
  77 +
  78 + // Delete the shell manager created above.
  79 + if (pShellManager != nullptr)
  80 + {
  81 + delete pShellManager;
  82 + }
  83 +
  84 +#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
  85 + ControlBarCleanUp();
  86 +#endif
  87 +
  88 + // Since the dialog has been closed, return FALSE so that we exit the
  89 + // application, rather than start the application's message pump.
  90 + return FALSE;
  91 +}
  92 +
  1 +
  2 +// NonStreamingTextToSpeech.h : main header file for the PROJECT_NAME application
  3 +//
  4 +
  5 +#pragma once
  6 +
  7 +#ifndef __AFXWIN_H__
  8 + #error "include 'pch.h' before including this file for PCH"
  9 +#endif
  10 +
  11 +#include "resource.h" // main symbols
  12 +
  13 +
  14 +// CNonStreamingTextToSpeechApp:
  15 +// See NonStreamingTextToSpeech.cpp for the implementation of this class
  16 +//
  17 +
  18 +class CNonStreamingTextToSpeechApp : public CWinApp
  19 +{
  20 +public:
  21 + CNonStreamingTextToSpeechApp();
  22 +
  23 +// Overrides
  24 +public:
  25 + virtual BOOL InitInstance();
  26 +
  27 +// Implementation
  28 +
  29 + DECLARE_MESSAGE_MAP()
  30 +};
  31 +
  32 +extern CNonStreamingTextToSpeechApp theApp;
1 B// Microsoft Visual C++ generated resource script. 1 B// Microsoft Visual C++ generated resource script.
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 + <ItemGroup Label="ProjectConfigurations">
  4 + <ProjectConfiguration Include="Debug|Win32">
  5 + <Configuration>Debug</Configuration>
  6 + <Platform>Win32</Platform>
  7 + </ProjectConfiguration>
  8 + <ProjectConfiguration Include="Release|Win32">
  9 + <Configuration>Release</Configuration>
  10 + <Platform>Win32</Platform>
  11 + </ProjectConfiguration>
  12 + <ProjectConfiguration Include="Debug|x64">
  13 + <Configuration>Debug</Configuration>
  14 + <Platform>x64</Platform>
  15 + </ProjectConfiguration>
  16 + <ProjectConfiguration Include="Release|x64">
  17 + <Configuration>Release</Configuration>
  18 + <Platform>x64</Platform>
  19 + </ProjectConfiguration>
  20 + </ItemGroup>
  21 + <PropertyGroup Label="Globals">
  22 + <VCProjectVersion>17.0</VCProjectVersion>
  23 + <ProjectGuid>{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}</ProjectGuid>
  24 + <Keyword>MFCProj</Keyword>
  25 + <RootNamespace>NonStreamingTextToSpeech</RootNamespace>
  26 + <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  27 + </PropertyGroup>
  28 + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  29 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
  30 + <ConfigurationType>Application</ConfigurationType>
  31 + <UseDebugLibraries>true</UseDebugLibraries>
  32 + <PlatformToolset>v143</PlatformToolset>
  33 + <CharacterSet>Unicode</CharacterSet>
  34 + <UseOfMfc>Dynamic</UseOfMfc>
  35 + </PropertyGroup>
  36 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
  37 + <ConfigurationType>Application</ConfigurationType>
  38 + <UseDebugLibraries>false</UseDebugLibraries>
  39 + <PlatformToolset>v143</PlatformToolset>
  40 + <WholeProgramOptimization>true</WholeProgramOptimization>
  41 + <CharacterSet>Unicode</CharacterSet>
  42 + <UseOfMfc>Dynamic</UseOfMfc>
  43 + </PropertyGroup>
  44 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
  45 + <ConfigurationType>Application</ConfigurationType>
  46 + <UseDebugLibraries>true</UseDebugLibraries>
  47 + <PlatformToolset>v143</PlatformToolset>
  48 + <CharacterSet>Unicode</CharacterSet>
  49 + <UseOfMfc>Dynamic</UseOfMfc>
  50 + </PropertyGroup>
  51 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
  52 + <ConfigurationType>Application</ConfigurationType>
  53 + <UseDebugLibraries>false</UseDebugLibraries>
  54 + <PlatformToolset>v143</PlatformToolset>
  55 + <WholeProgramOptimization>true</WholeProgramOptimization>
  56 + <CharacterSet>Unicode</CharacterSet>
  57 + <UseOfMfc>Static</UseOfMfc>
  58 + </PropertyGroup>
  59 + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  60 + <ImportGroup Label="ExtensionSettings">
  61 + </ImportGroup>
  62 + <ImportGroup Label="Shared">
  63 + </ImportGroup>
  64 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
  65 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  66 + <Import Project="sherpa-onnx-deps.props" />
  67 + </ImportGroup>
  68 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
  69 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  70 + <Import Project="sherpa-onnx-deps.props" />
  71 + </ImportGroup>
  72 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
  73 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  74 + <Import Project="sherpa-onnx-deps.props" />
  75 + </ImportGroup>
  76 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
  77 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  78 + <Import Project="sherpa-onnx-deps.props" />
  79 + </ImportGroup>
  80 + <PropertyGroup Label="UserMacros" />
  81 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
  82 + <LinkIncremental>false</LinkIncremental>
  83 + </PropertyGroup>
  84 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
  85 + <LinkIncremental>true</LinkIncremental>
  86 + </PropertyGroup>
  87 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
  88 + <LinkIncremental>true</LinkIncremental>
  89 + </PropertyGroup>
  90 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
  91 + <LinkIncremental>false</LinkIncremental>
  92 + </PropertyGroup>
  93 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
  94 + <ClCompile>
  95 + <PrecompiledHeader>Use</PrecompiledHeader>
  96 + <WarningLevel>Level3</WarningLevel>
  97 + <FunctionLevelLinking>true</FunctionLevelLinking>
  98 + <IntrinsicFunctions>true</IntrinsicFunctions>
  99 + <SDLCheck>true</SDLCheck>
  100 + <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  101 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  102 + </ClCompile>
  103 + <Link>
  104 + <SubSystem>Windows</SubSystem>
  105 + <EnableCOMDATFolding>true</EnableCOMDATFolding>
  106 + <OptimizeReferences>true</OptimizeReferences>
  107 + </Link>
  108 + <Midl>
  109 + <MkTypLibCompatible>false</MkTypLibCompatible>
  110 + <ValidateAllParameters>true</ValidateAllParameters>
  111 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  112 + </Midl>
  113 + <ResourceCompile>
  114 + <Culture>0x0409</Culture>
  115 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  116 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  117 + </ResourceCompile>
  118 + </ItemDefinitionGroup>
  119 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
  120 + <ClCompile>
  121 + <PrecompiledHeader>Use</PrecompiledHeader>
  122 + <WarningLevel>Level3</WarningLevel>
  123 + <SDLCheck>true</SDLCheck>
  124 + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  125 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  126 + </ClCompile>
  127 + <Link>
  128 + <SubSystem>Windows</SubSystem>
  129 + </Link>
  130 + <Midl>
  131 + <MkTypLibCompatible>false</MkTypLibCompatible>
  132 + <ValidateAllParameters>true</ValidateAllParameters>
  133 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  134 + </Midl>
  135 + <ResourceCompile>
  136 + <Culture>0x0409</Culture>
  137 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  138 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  139 + </ResourceCompile>
  140 + </ItemDefinitionGroup>
  141 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
  142 + <ClCompile>
  143 + <PrecompiledHeader>Use</PrecompiledHeader>
  144 + <WarningLevel>Level3</WarningLevel>
  145 + <SDLCheck>true</SDLCheck>
  146 + <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  147 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  148 + </ClCompile>
  149 + <Link>
  150 + <SubSystem>Windows</SubSystem>
  151 + </Link>
  152 + <Midl>
  153 + <MkTypLibCompatible>false</MkTypLibCompatible>
  154 + <ValidateAllParameters>true</ValidateAllParameters>
  155 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  156 + </Midl>
  157 + <ResourceCompile>
  158 + <Culture>0x0409</Culture>
  159 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  160 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  161 + </ResourceCompile>
  162 + </ItemDefinitionGroup>
  163 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
  164 + <ClCompile>
  165 + <PrecompiledHeader>Use</PrecompiledHeader>
  166 + <WarningLevel>Level3</WarningLevel>
  167 + <FunctionLevelLinking>true</FunctionLevelLinking>
  168 + <IntrinsicFunctions>true</IntrinsicFunctions>
  169 + <SDLCheck>true</SDLCheck>
  170 + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  171 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  172 + </ClCompile>
  173 + <Link>
  174 + <SubSystem>Windows</SubSystem>
  175 + <EnableCOMDATFolding>true</EnableCOMDATFolding>
  176 + <OptimizeReferences>true</OptimizeReferences>
  177 + </Link>
  178 + <Midl>
  179 + <MkTypLibCompatible>false</MkTypLibCompatible>
  180 + <ValidateAllParameters>true</ValidateAllParameters>
  181 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  182 + </Midl>
  183 + <ResourceCompile>
  184 + <Culture>0x0409</Culture>
  185 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  186 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  187 + </ResourceCompile>
  188 + </ItemDefinitionGroup>
  189 + <ItemGroup>
  190 + <ClInclude Include="framework.h" />
  191 + <ClInclude Include="NonStreamingTextToSpeech.h" />
  192 + <ClInclude Include="NonStreamingTextToSpeechDlg.h" />
  193 + <ClInclude Include="pch.h" />
  194 + <ClInclude Include="Resource.h" />
  195 + <ClInclude Include="targetver.h" />
  196 + </ItemGroup>
  197 + <ItemGroup>
  198 + <ClCompile Include="NonStreamingTextToSpeech.cpp" />
  199 + <ClCompile Include="NonStreamingTextToSpeechDlg.cpp" />
  200 + <ClCompile Include="pch.cpp">
  201 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
  202 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
  203 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
  204 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
  205 + </ClCompile>
  206 + </ItemGroup>
  207 + <ItemGroup>
  208 + <ResourceCompile Include="NonStreamingTextToSpeech.rc" />
  209 + </ItemGroup>
  210 + <ItemGroup>
  211 + <None Include="res\NonStreamingTextToSpeech.rc2" />
  212 + </ItemGroup>
  213 + <ItemGroup>
  214 + <Image Include="res\NonStreamingTextToSpeech.ico" />
  215 + </ItemGroup>
  216 + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  217 + <ImportGroup Label="ExtensionTargets">
  218 + </ImportGroup>
  219 +</Project>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 + <ItemGroup>
  4 + <Filter Include="Source Files">
  5 + <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
  6 + <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
  7 + </Filter>
  8 + <Filter Include="Header Files">
  9 + <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
  10 + <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
  11 + </Filter>
  12 + <Filter Include="Resource Files">
  13 + <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
  14 + <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
  15 + </Filter>
  16 + </ItemGroup>
  17 + <ItemGroup>
  18 + <ClInclude Include="NonStreamingTextToSpeech.h">
  19 + <Filter>Header Files</Filter>
  20 + </ClInclude>
  21 + <ClInclude Include="NonStreamingTextToSpeechDlg.h">
  22 + <Filter>Header Files</Filter>
  23 + </ClInclude>
  24 + <ClInclude Include="framework.h">
  25 + <Filter>Header Files</Filter>
  26 + </ClInclude>
  27 + <ClInclude Include="targetver.h">
  28 + <Filter>Header Files</Filter>
  29 + </ClInclude>
  30 + <ClInclude Include="Resource.h">
  31 + <Filter>Header Files</Filter>
  32 + </ClInclude>
  33 + <ClInclude Include="pch.h">
  34 + <Filter>Header Files</Filter>
  35 + </ClInclude>
  36 + </ItemGroup>
  37 + <ItemGroup>
  38 + <ClCompile Include="NonStreamingTextToSpeech.cpp">
  39 + <Filter>Source Files</Filter>
  40 + </ClCompile>
  41 + <ClCompile Include="NonStreamingTextToSpeechDlg.cpp">
  42 + <Filter>Source Files</Filter>
  43 + </ClCompile>
  44 + <ClCompile Include="pch.cpp">
  45 + <Filter>Source Files</Filter>
  46 + </ClCompile>
  47 + </ItemGroup>
  48 + <ItemGroup>
  49 + <ResourceCompile Include="NonStreamingTextToSpeech.rc">
  50 + <Filter>Resource Files</Filter>
  51 + </ResourceCompile>
  52 + </ItemGroup>
  53 + <ItemGroup>
  54 + <None Include="res\NonStreamingTextToSpeech.rc2">
  55 + <Filter>Resource Files</Filter>
  56 + </None>
  57 + </ItemGroup>
  58 + <ItemGroup>
  59 + <Image Include="res\NonStreamingTextToSpeech.ico">
  60 + <Filter>Resource Files</Filter>
  61 + </Image>
  62 + </ItemGroup>
  63 +</Project>
  1 +
  2 +// NonStreamingTextToSpeechDlg.cpp : implementation file
  3 +//
  4 +
  5 +#include "pch.h"
  6 +#include "framework.h"
  7 +#include "NonStreamingTextToSpeech.h"
  8 +#include "NonStreamingTextToSpeechDlg.h"
  9 +#include "afxdialogex.h"
  10 +
  11 +#include <fstream>
  12 +#include <stdexcept>
  13 +#include <string>
  14 +#include <vector>
  15 +
  16 +#ifdef _DEBUG
  17 +#define new DEBUG_NEW
  18 +#endif
  19 +
  20 +
  21 +// CAboutDlg dialog used for App About
  22 +
  23 +class CAboutDlg : public CDialogEx
  24 +{
  25 +public:
  26 + CAboutDlg();
  27 +
  28 +// Dialog Data
  29 +#ifdef AFX_DESIGN_TIME
  30 + enum { IDD = IDD_ABOUTBOX };
  31 +#endif
  32 +
  33 + protected:
  34 + virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
  35 +
  36 +// Implementation
  37 +protected:
  38 + DECLARE_MESSAGE_MAP()
  39 +};
  40 +
  41 +CAboutDlg::CAboutDlg() : CDialogEx(IDD_ABOUTBOX)
  42 +{
  43 +}
  44 +
  45 +void CAboutDlg::DoDataExchange(CDataExchange* pDX)
  46 +{
  47 + CDialogEx::DoDataExchange(pDX);
  48 +}
  49 +
  50 +BEGIN_MESSAGE_MAP(CAboutDlg, CDialogEx)
  51 +END_MESSAGE_MAP()
  52 +
  53 +
  54 +// CNonStreamingTextToSpeechDlg dialog
  55 +
  56 +// see
  57 +// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
  58 +static std::wstring Utf8ToUtf16(const std::string &utf8) {
  59 + std::vector<unsigned long> unicode;
  60 + size_t i = 0;
  61 + while (i < utf8.size()) {
  62 + unsigned long uni;
  63 + size_t todo;
  64 + bool error = false;
  65 + unsigned char ch = utf8[i++];
  66 + if (ch <= 0x7F) {
  67 + uni = ch;
  68 + todo = 0;
  69 + } else if (ch <= 0xBF) {
  70 + throw std::logic_error("not a UTF-8 string");
  71 + } else if (ch <= 0xDF) {
  72 + uni = ch & 0x1F;
  73 + todo = 1;
  74 + } else if (ch <= 0xEF) {
  75 + uni = ch & 0x0F;
  76 + todo = 2;
  77 + } else if (ch <= 0xF7) {
  78 + uni = ch & 0x07;
  79 + todo = 3;
  80 + } else {
  81 + throw std::logic_error("not a UTF-8 string");
  82 + }
  83 + for (size_t j = 0; j < todo; ++j) {
  84 + if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
  85 + unsigned char ch = utf8[i++];
  86 + if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
  87 + uni <<= 6;
  88 + uni += ch & 0x3F;
  89 + }
  90 + if (uni >= 0xD800 && uni <= 0xDFFF)
  91 + throw std::logic_error("not a UTF-8 string");
  92 + if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
  93 + unicode.push_back(uni);
  94 + }
  95 + std::wstring utf16;
  96 + for (size_t i = 0; i < unicode.size(); ++i) {
  97 + unsigned long uni = unicode[i];
  98 + if (uni <= 0xFFFF) {
  99 + utf16 += (wchar_t)uni;
  100 + } else {
  101 + uni -= 0x10000;
  102 + utf16 += (wchar_t)((uni >> 10) + 0xD800);
  103 + utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
  104 + }
  105 + }
  106 + return utf16;
  107 +}
  108 +
  109 +// The system calls this function to obtain the cursor to display while the user drags
  110 +// the minimized window.
  111 +HCURSOR CNonStreamingTextToSpeechDlg::OnQueryDragIcon()
  112 +{
  113 + return static_cast<HCURSOR>(m_hIcon);
  114 +}
  115 +
  116 +
  117 +void AppendTextToEditCtrl(CEdit& e, const std::string &s) {
  118 + // get the initial text length
  119 + int nLength = e.GetWindowTextLength();
  120 + // put the selection at the end of text
  121 + e.SetSel(nLength, nLength);
  122 + // replace the selection
  123 +
  124 + std::wstring wstr = Utf8ToUtf16(s);
  125 +
  126 + // my_text_.ReplaceSel(wstr.c_str());
  127 + e.ReplaceSel(wstr.c_str());
  128 +}
  129 +
  130 +void AppendLineToMultilineEditCtrl(CEdit& e, const std::string &s) {
  131 + AppendTextToEditCtrl(e, "\r\n" + s);
  132 +}
  133 +
  134 +
  135 +CNonStreamingTextToSpeechDlg::CNonStreamingTextToSpeechDlg(CWnd* pParent /*=nullptr*/)
  136 + : CDialogEx(IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG, pParent)
  137 + {
  138 + m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
  139 +}
  140 +
  141 +void CNonStreamingTextToSpeechDlg::DoDataExchange(CDataExchange* pDX)
  142 +{
  143 + CDialogEx::DoDataExchange(pDX);
  144 + DDX_Control(pDX, IDC_HINT, my_hint_);
  145 + DDX_Control(pDX, IDC_SPEAKER, speaker_id_);
  146 + DDX_Control(pDX, IDC_SPEED, speed_);
  147 + DDX_Control(pDX, IDOK, generate_btn_);
  148 + DDX_Control(pDX, IDC_TEXT, my_text_);
  149 +}
  150 +
  151 +BEGIN_MESSAGE_MAP(CNonStreamingTextToSpeechDlg, CDialogEx)
  152 + ON_WM_SYSCOMMAND()
  153 + ON_WM_PAINT()
  154 + ON_WM_QUERYDRAGICON()
  155 + ON_BN_CLICKED(IDOK, &CNonStreamingTextToSpeechDlg::OnBnClickedOk)
  156 + END_MESSAGE_MAP()
  157 +
  158 +
  159 +// CNonStreamingTextToSpeechDlg message handlers
  160 +
  161 +BOOL CNonStreamingTextToSpeechDlg::OnInitDialog()
  162 +{
  163 + CDialogEx::OnInitDialog();
  164 +
  165 + // Add "About..." menu item to system menu.
  166 +
  167 + // IDM_ABOUTBOX must be in the system command range.
  168 + ASSERT((IDM_ABOUTBOX & 0xFFF0) == IDM_ABOUTBOX);
  169 + ASSERT(IDM_ABOUTBOX < 0xF000);
  170 +
  171 + CMenu* pSysMenu = GetSystemMenu(FALSE);
  172 + if (pSysMenu != nullptr)
  173 + {
  174 + BOOL bNameValid;
  175 + CString strAboutMenu;
  176 + bNameValid = strAboutMenu.LoadString(IDS_ABOUTBOX);
  177 + ASSERT(bNameValid);
  178 + if (!strAboutMenu.IsEmpty())
  179 + {
  180 + pSysMenu->AppendMenu(MF_SEPARATOR);
  181 + pSysMenu->AppendMenu(MF_STRING, IDM_ABOUTBOX, strAboutMenu);
  182 + }
  183 + }
  184 +
  185 + // Set the icon for this dialog. The framework does this automatically
  186 + // when the application's main window is not a dialog
  187 + SetIcon(m_hIcon, TRUE); // Set big icon
  188 + SetIcon(m_hIcon, FALSE); // Set small icon
  189 +
  190 + // TODO: Add extra initialization here
  191 + Init();
  192 +
  193 + return TRUE; // return TRUE unless you set the focus to a control
  194 +}
  195 +
  196 +void CNonStreamingTextToSpeechDlg::OnSysCommand(UINT nID, LPARAM lParam)
  197 +{
  198 + if ((nID & 0xFFF0) == IDM_ABOUTBOX)
  199 + {
  200 + CAboutDlg dlgAbout;
  201 + dlgAbout.DoModal();
  202 + }
  203 + else
  204 + {
  205 + CDialogEx::OnSysCommand(nID, lParam);
  206 + }
  207 +}
  208 +
  209 +// If you add a minimize button to your dialog, you will need the code below
  210 +// to draw the icon . For MFC applications using the document/view model,
  211 +// this is automatically done for you by the framework.
  212 +
  213 +void CNonStreamingTextToSpeechDlg::OnPaint()
  214 +{
  215 + if (IsIconic())
  216 + {
  217 + CPaintDC dc(this); // device context for painting
  218 +
  219 + SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()), 0);
  220 +
  221 + // Center icon in client rectangle
  222 + int cxIcon = GetSystemMetrics(SM_CXICON);
  223 + int cyIcon = GetSystemMetrics(SM_CYICON);
  224 + CRect rect;
  225 + GetClientRect(&rect);
  226 + int x = (rect.Width() - cxIcon + 1) / 2;
  227 + int y = (rect.Height() - cyIcon + 1) / 2;
  228 +
  229 + // Draw the icon
  230 + dc.DrawIcon(x, y, m_hIcon);
  231 + }
  232 + else
  233 + {
  234 + CDialogEx::OnPaint();
  235 + }
  236 +}
  237 +
  238 +bool Exists(const std::string &filename) {
  239 + std::ifstream is(filename);
  240 + return is.good();
  241 +}
  242 +
  243 +void CNonStreamingTextToSpeechDlg::InitHint() {
  244 + AppendLineToMultilineEditCtrl(my_hint_, "Speaker ID: Used only for multi-speaker models. Example value: 0");
  245 + AppendLineToMultilineEditCtrl(my_hint_, "Speed: Larger -> Faster in speech speed. Example value: 1.0");
  246 + AppendLineToMultilineEditCtrl(my_hint_, "\r\n\r\nPlease input your text and click the button Generate");
  247 +
  248 +}
  249 +
  250 +void CNonStreamingTextToSpeechDlg::Init() {
  251 + InitHint();
  252 + speaker_id_.SetWindowText(Utf8ToUtf16("0").c_str());
  253 + speed_.SetWindowText(Utf8ToUtf16("1.0").c_str());
  254 +
  255 + bool ok = true;
  256 + std::string error_message = "--------------------";
  257 + if (!Exists("./model.onnx")) {
  258 + error_message += "Cannot find ./model.onnx\r\n";
  259 + ok = false;
  260 + }
  261 +
  262 + if (!Exists("./lexicon.txt")) {
  263 + error_message += "Cannot find ./lexicon.txt\r\n";
  264 + ok = false;
  265 + }
  266 +
  267 + if (!Exists("./tokens.txt")) {
  268 + error_message += "Cannot find ./tokens.txt\r\n";
  269 + ok = false;
  270 + }
  271 +
  272 + if (!ok) {
  273 + generate_btn_.EnableWindow(FALSE);
  274 + error_message +=
  275 + "\r\nPlease refer to\r\n"
  276 + "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html";
  277 + error_message += "\r\nto download models.\r\n";
  278 + error_message += "\r\nWe given an example below\r\n\r\n";
  279 + error_message +=
  280 + "wget -O model.onnx "
  281 + "https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/"
  282 + "vits-aishell3.onnx\r\n";
  283 + error_message +=
  284 + "wget "
  285 + "https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/"
  286 + "lexicon.txt\r\n";
  287 + error_message +=
  288 + "wget "
  289 + "https://huggingface.co/csukuangfj/vits-zh-aishell3/resolve/main/"
  290 + "tokens.txt\r\n";
  291 +
  292 + AppendLineToMultilineEditCtrl(my_hint_, error_message);
  293 + return;
  294 + }
  295 +
  296 + // Now init tts
  297 + SherpaOnnxOfflineTtsConfig config;
  298 + memset(&config, 0, sizeof(config));
  299 + config.model.debug = 0;
  300 + config.model.num_threads = 1;
  301 + config.model.provider = "cpu";
  302 + config.model.vits.model = "./model.onnx";
  303 + config.model.vits.lexicon = "./lexicon.txt";
  304 + config.model.vits.tokens = "./tokens.txt";
  305 +
  306 + tts_ = SherpaOnnxCreateOfflineTts(&config);
  307 +}
  308 +
  309 + CNonStreamingTextToSpeechDlg::~CNonStreamingTextToSpeechDlg() {
  310 + if (tts_) {
  311 + SherpaOnnxDestroyOfflineTts(tts_);
  312 + }
  313 + }
  314 +
  315 +
  316 +
  317 +void CNonStreamingTextToSpeechDlg::OnBnClickedOk() {
  318 + // TODO: Add your control notification handler code here
  319 + CString s;
  320 + speaker_id_.GetWindowText(s);
  321 + int speaker_id = _ttoi(s);
  322 + if (speaker_id < 0) {
  323 + AfxMessageBox(Utf8ToUtf16("Please input a valid speaker ID").c_str(), MB_OK);
  324 + return;
  325 + }
  326 +
  327 + speed_.GetWindowText(s);
  328 + float speed = _ttof(s);
  329 + if (speed < 0) {
  330 + AfxMessageBox(Utf8ToUtf16("Please input a valid speed").c_str(), MB_OK);
  331 + return;
  332 + }
  333 +
  334 + my_text_.GetWindowText(s);
  335 + CT2CA pszConvertedAnsiString(s);
  336 + std::string ss(pszConvertedAnsiString);
  337 + if (ss.empty()) {
  338 + AfxMessageBox(Utf8ToUtf16("Please input your text").c_str(), MB_OK);
  339 + return;
  340 + }
  341 +
  342 +const SherpaOnnxGeneratedAudio *audio =
  343 + SherpaOnnxOfflineTtsGenerate(tts_, ss.c_str(), speaker_id, speed);
  344 + std::string filename = "./generated.wav";
  345 +int ok = SherpaOnnxWriteWave(audio->samples, audio->n, audio->sample_rate,
  346 + filename.c_str());
  347 +
  348 + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
  349 +
  350 + if (ok) {
  351 + AfxMessageBox(Utf8ToUtf16("Saved to ./generated.wav successfully").c_str(), MB_OK);
  352 + } else {
  353 + AfxMessageBox(Utf8ToUtf16("Failed to save to ./generated.wav").c_str(), MB_OK);
  354 + }
  355 +
  356 + //CDialogEx::OnOK();
  357 +}
  1 +
  2 +// NonStreamingTextToSpeechDlg.h : header file
  3 +//
  4 +
  5 +#pragma once
  6 +
  7 +#include "sherpa-onnx/c-api/c-api.h"
  8 +
  9 +
  10 +// CNonStreamingTextToSpeechDlg dialog
  11 +class CNonStreamingTextToSpeechDlg : public CDialogEx
  12 +{
  13 +// Construction
  14 +public:
  15 + CNonStreamingTextToSpeechDlg(CWnd* pParent = nullptr); // standard constructor
  16 + ~CNonStreamingTextToSpeechDlg();
  17 +
  18 +// Dialog Data
  19 +#ifdef AFX_DESIGN_TIME
  20 + enum { IDD = IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG };
  21 +#endif
  22 +
  23 + protected:
  24 + virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support
  25 +
  26 +
  27 +// Implementation
  28 +protected:
  29 + HICON m_hIcon;
  30 +
  31 + // Generated message map functions
  32 + virtual BOOL OnInitDialog();
  33 + afx_msg void OnSysCommand(UINT nID, LPARAM lParam);
  34 + afx_msg void OnPaint();
  35 + afx_msg HCURSOR OnQueryDragIcon();
  36 + DECLARE_MESSAGE_MAP()
  37 + public:
  38 + CEdit my_hint_;
  39 + CEdit speaker_id_;
  40 + CEdit speed_;
  41 + void Init();
  42 + void InitHint();
  43 + CButton generate_btn_;
  44 + afx_msg void OnBnClickedOk();
  45 +
  46 + SherpaOnnxOfflineTts *tts_;
  47 + CEdit my_text_;
  48 +};
  1 +//{{NO_DEPENDENCIES}}
  2 +// Microsoft Visual C++ generated include file.
  3 +// Used by NonStreamingTextToSpeech.rc
  4 +//
  5 +#define IDM_ABOUTBOX 0x0010
  6 +#define IDD_ABOUTBOX 100
  7 +#define IDS_ABOUTBOX 101
  8 +#define IDD_NONSTREAMINGTEXTTOSPEECH_DIALOG 102
  9 +#define IDR_MAINFRAME 128
  10 +#define IDC_SPEAKER 1000
  11 +#define IDC_SPEED 1003
  12 +#define IDC_TEXT 1004
  13 +#define IDC_HINT 1005
  14 +
  15 +// Next default values for new objects
  16 +//
  17 +#ifdef APSTUDIO_INVOKED
  18 +#ifndef APSTUDIO_READONLY_SYMBOLS
  19 +#define _APS_NEXT_RESOURCE_VALUE 130
  20 +#define _APS_NEXT_COMMAND_VALUE 32771
  21 +#define _APS_NEXT_CONTROL_VALUE 1006
  22 +#define _APS_NEXT_SYMED_VALUE 101
  23 +#endif
  24 +#endif
  1 +#pragma once
  2 +
  3 +#ifndef VC_EXTRALEAN
  4 +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
  5 +#endif
  6 +
  7 +#include "targetver.h"
  8 +
  9 +#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be explicit
  10 +
  11 +// turns off MFC's hiding of some common and often safely ignored warning messages
  12 +#define _AFX_ALL_WARNINGS
  13 +
  14 +#include <afxwin.h> // MFC core and standard components
  15 +#include <afxext.h> // MFC extensions
  16 +
  17 +
  18 +
  19 +
  20 +
  21 +#ifndef _AFX_NO_OLE_SUPPORT
  22 +#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls
  23 +#endif
  24 +#ifndef _AFX_NO_AFXCMN_SUPPORT
  25 +#include <afxcmn.h> // MFC support for Windows Common Controls
  26 +#endif // _AFX_NO_AFXCMN_SUPPORT
  27 +
  28 +#include <afxcontrolbars.h> // MFC support for ribbons and control bars
  29 +
  30 +
  31 +
  32 +
  33 +
  34 +
  35 +
  36 +
  37 +
  38 +
  39 +
  1 +// pch.cpp: source file corresponding to the pre-compiled header
  2 +
  3 +#include "pch.h"
  4 +
  5 +// When you are using pre-compiled headers, this source file is necessary for compilation to succeed.
  1 +// pch.h: This is a precompiled header file.
  2 +// Files listed below are compiled only once, improving build performance for future builds.
  3 +// This also affects IntelliSense performance, including code completion and many code browsing features.
  4 +// However, files listed here are ALL re-compiled if any one of them is updated between builds.
  5 +// Do not add files here that you will be updating frequently as this negates the performance advantage.
  6 +
  7 +#ifndef PCH_H
  8 +#define PCH_H
  9 +
  10 +// add headers that you want to pre-compile here
  11 +#include "framework.h"
  12 +
  13 +#endif //PCH_H
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 + <ImportGroup Label="PropertySheets" />
  4 + <PropertyGroup Label="UserMacros" />
  5 + <PropertyGroup>
  6 + <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
  7 + <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
  8 + <SherpaOnnxLibraries>
  9 + sherpa-onnx-portaudio_static.lib;
  10 + sherpa-onnx-c-api.lib;
  11 + sherpa-onnx-core.lib;
  12 + kaldi-decoder-core.lib;
  13 + sherpa-onnx-kaldifst-core.lib;
  14 + sherpa-onnx-fst.lib;
  15 + kaldi-native-fbank-core.lib;
  16 + absl_base.lib;
  17 + absl_city.lib;
  18 + absl_hash.lib;
  19 + absl_low_level_hash.lib;
  20 + absl_raw_hash_set.lib;
  21 + absl_raw_logging_internal.lib;
  22 + absl_throw_delegate.lib;
  23 + clog.lib;
  24 + cpuinfo.lib;
  25 + flatbuffers.lib;
  26 + libprotobuf-lite.lib;
  27 + onnx.lib;
  28 + onnx_proto.lib;
  29 + onnxruntime_common.lib;
  30 + onnxruntime_flatbuffers.lib;
  31 + onnxruntime_framework.lib;
  32 + onnxruntime_graph.lib;
  33 + onnxruntime_mlas.lib;
  34 + onnxruntime_optimizer.lib;
  35 + onnxruntime_providers.lib;
  36 + onnxruntime_session.lib;
  37 + onnxruntime_util.lib;
  38 + re2.lib;
  39 + </SherpaOnnxLibraries>
  40 + </PropertyGroup>
  41 + <ItemDefinitionGroup>
  42 + <ClCompile>
  43 + <AdditionalIncludeDirectories>
  44 + $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
  45 + $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  46 + </ClCompile>
  47 + <Link>
  48 + <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
  49 + <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
  50 + </Link>
  51 + </ItemDefinitionGroup>
  52 + <ItemGroup />
  53 +</Project>
  1 +#pragma once
  2 +
  3 +// Including SDKDDKVer.h defines the highest available Windows platform.
  4 +
  5 +// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and
  6 +// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h.
  7 +
  8 +#include <SDKDDKVer.h>
@@ -7,6 +7,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition" @@ -7,6 +7,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition"
7 EndProject 7 EndProject
8 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}" 8 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
9 EndProject 9 EndProject
  10 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingTextToSpeech", "NonStreamingTextToSpeech\NonStreamingTextToSpeech.vcxproj", "{9A5F2CCC-1AAB-4F7F-A608-F0B512023405}"
  11 +EndProject
10 Global 12 Global
11 GlobalSection(SolutionConfigurationPlatforms) = preSolution 13 GlobalSection(SolutionConfigurationPlatforms) = preSolution
12 Debug|x64 = Debug|x64 14 Debug|x64 = Debug|x64
@@ -31,6 +33,14 @@ Global @@ -31,6 +33,14 @@ Global
31 {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64 33 {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
32 {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32 34 {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
33 {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32 35 {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
  36 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x64.ActiveCfg = Debug|x64
  37 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x64.Build.0 = Debug|x64
  38 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x86.ActiveCfg = Debug|Win32
  39 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Debug|x86.Build.0 = Debug|Win32
  40 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x64.ActiveCfg = Release|x64
  41 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x64.Build.0 = Release|x64
  42 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x86.ActiveCfg = Release|Win32
  43 + {9A5F2CCC-1AAB-4F7F-A608-F0B512023405}.Release|x86.Build.0 = Release|Win32
34 EndGlobalSection 44 EndGlobalSection
35 GlobalSection(SolutionProperties) = preSolution 45 GlobalSection(SolutionProperties) = preSolution
36 HideSolutionNode = FALSE 46 HideSolutionNode = FALSE
@@ -572,11 +572,11 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { @@ -572,11 +572,11 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
572 return tts 572 return tts
573 } 573 }
574 574
575 -func (tts *OfflineTts) Generate(text string, sid int) *GeneratedAudio { 575 +func (tts *OfflineTts) Generate(text string, sid int, speed float32) *GeneratedAudio {
576 s := C.CString(text) 576 s := C.CString(text)
577 defer C.free(unsafe.Pointer(s)) 577 defer C.free(unsafe.Pointer(s))
578 578
579 - audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid)) 579 + audio := C.SherpaOnnxOfflineTtsGenerate(tts.impl, s, C.int(sid), C.float(speed))
580 defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio) 580 defer C.SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
581 581
582 ans := &GeneratedAudio{} 582 ans := &GeneratedAudio{}
@@ -568,8 +568,9 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts( @@ -568,8 +568,9 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
568 void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; } 568 void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
569 569
570 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( 570 const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
571 - const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {  
572 - sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid); 571 + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
  572 + float speed) {
  573 + sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid, speed);
573 574
574 if (audio.samples.empty()) { 575 if (audio.samples.empty()) {
575 return nullptr; 576 return nullptr;
@@ -639,7 +639,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts); @@ -639,7 +639,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
639 // The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned 639 // The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned
640 // pointer to avoid memory leak. 640 // pointer to avoid memory leak.
641 SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate( 641 SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
642 - const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid); 642 + const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
  643 + float speed);
643 644
644 SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio( 645 SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
645 const SherpaOnnxGeneratedAudio *p); 646 const SherpaOnnxGeneratedAudio *p);
@@ -18,8 +18,8 @@ class OfflineTtsImpl { @@ -18,8 +18,8 @@ class OfflineTtsImpl {
18 18
19 static std::unique_ptr<OfflineTtsImpl> Create(const OfflineTtsConfig &config); 19 static std::unique_ptr<OfflineTtsImpl> Create(const OfflineTtsConfig &config);
20 20
21 - virtual GeneratedAudio Generate(const std::string &text,  
22 - int64_t sid = 0) const = 0; 21 + virtual GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
  22 + float speed = 1.0) const = 0;
23 }; 23 };
24 24
25 } // namespace sherpa_onnx 25 } // namespace sherpa_onnx
@@ -24,8 +24,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -24,8 +24,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
24 model_->Punctuations(), model_->Language(), 24 model_->Punctuations(), model_->Language(),
25 config.model.debug) {} 25 config.model.debug) {}
26 26
27 - GeneratedAudio Generate(const std::string &text,  
28 - int64_t sid = 0) const override { 27 + GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
  28 + float speed = 1.0) const override {
29 int32_t num_speakers = model_->NumSpeakers(); 29 int32_t num_speakers = model_->NumSpeakers();
30 if (num_speakers == 0 && sid != 0) { 30 if (num_speakers == 0 && sid != 0) {
31 SHERPA_ONNX_LOGE( 31 SHERPA_ONNX_LOGE(
@@ -66,7 +66,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl { @@ -66,7 +66,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
66 Ort::Value x_tensor = Ort::Value::CreateTensor( 66 Ort::Value x_tensor = Ort::Value::CreateTensor(
67 memory_info, x.data(), x.size(), x_shape.data(), x_shape.size()); 67 memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
68 68
69 - Ort::Value audio = model_->Run(std::move(x_tensor), sid); 69 + Ort::Value audio = model_->Run(std::move(x_tensor), sid, speed);
70 70
71 std::vector<int64_t> audio_shape = 71 std::vector<int64_t> audio_shape =
72 audio.GetTensorTypeAndShapeInfo().GetShape(); 72 audio.GetTensorTypeAndShapeInfo().GetShape();
@@ -17,7 +17,7 @@ void OfflineTtsVitsModelConfig::Register(ParseOptions *po) { @@ -17,7 +17,7 @@ void OfflineTtsVitsModelConfig::Register(ParseOptions *po) {
17 po->Register("vits-noise-scale-w", &noise_scale_w, 17 po->Register("vits-noise-scale-w", &noise_scale_w,
18 "noise_scale_w for VITS models"); 18 "noise_scale_w for VITS models");
19 po->Register("vits-length-scale", &length_scale, 19 po->Register("vits-length-scale", &length_scale,
20 - "length_scale for VITS models"); 20 + "Speech speed. Larger->Slower; Smaller->faster.");
21 } 21 }
22 22
23 bool OfflineTtsVitsModelConfig::Validate() const { 23 bool OfflineTtsVitsModelConfig::Validate() const {
@@ -26,7 +26,7 @@ class OfflineTtsVitsModel::Impl { @@ -26,7 +26,7 @@ class OfflineTtsVitsModel::Impl {
26 Init(buf.data(), buf.size()); 26 Init(buf.data(), buf.size());
27 } 27 }
28 28
29 - Ort::Value Run(Ort::Value x, int64_t sid) { 29 + Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
30 auto memory_info = 30 auto memory_info =
31 Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); 31 Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
32 32
@@ -48,6 +48,10 @@ class OfflineTtsVitsModel::Impl { @@ -48,6 +48,10 @@ class OfflineTtsVitsModel::Impl {
48 float length_scale = config_.vits.length_scale; 48 float length_scale = config_.vits.length_scale;
49 float noise_scale_w = config_.vits.noise_scale_w; 49 float noise_scale_w = config_.vits.noise_scale_w;
50 50
  51 + if (speed != 1 && speed > 0) {
  52 + length_scale = 1. / speed;
  53 + }
  54 +
51 Ort::Value noise_scale_tensor = 55 Ort::Value noise_scale_tensor =
52 Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1); 56 Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);
53 57
@@ -139,8 +143,9 @@ OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config) @@ -139,8 +143,9 @@ OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config)
139 143
140 OfflineTtsVitsModel::~OfflineTtsVitsModel() = default; 144 OfflineTtsVitsModel::~OfflineTtsVitsModel() = default;
141 145
142 -Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/) {  
143 - return impl_->Run(std::move(x), sid); 146 +Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/,
  147 + float speed /*= 1.0*/) {
  148 + return impl_->Run(std::move(x), sid, speed);
144 } 149 }
145 150
146 int32_t OfflineTtsVitsModel::SampleRate() const { return impl_->SampleRate(); } 151 int32_t OfflineTtsVitsModel::SampleRate() const { return impl_->SampleRate(); }
@@ -29,7 +29,7 @@ class OfflineTtsVitsModel { @@ -29,7 +29,7 @@ class OfflineTtsVitsModel {
29 * @return Return a float32 tensor containing audio samples. You can flatten 29 * @return Return a float32 tensor containing audio samples. You can flatten
30 * it to a 1-D tensor. 30 * it to a 1-D tensor.
31 */ 31 */
32 - Ort::Value Run(Ort::Value x, int64_t sid = 0); 32 + Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0);
33 33
34 // Sample rate of the generated audio 34 // Sample rate of the generated audio
35 int32_t SampleRate() const; 35 int32_t SampleRate() const;
@@ -28,9 +28,9 @@ OfflineTts::OfflineTts(const OfflineTtsConfig &config) @@ -28,9 +28,9 @@ OfflineTts::OfflineTts(const OfflineTtsConfig &config)
28 28
29 OfflineTts::~OfflineTts() = default; 29 OfflineTts::~OfflineTts() = default;
30 30
31 -GeneratedAudio OfflineTts::Generate(const std::string &text,  
32 - int64_t sid /*=0*/) const {  
33 - return impl_->Generate(text, sid); 31 +GeneratedAudio OfflineTts::Generate(const std::string &text, int64_t sid /*=0*/,
  32 + float speed /*= 1.0*/) const {
  33 + return impl_->Generate(text, sid, speed);
34 } 34 }
35 35
36 } // namespace sherpa_onnx 36 } // namespace sherpa_onnx
@@ -43,7 +43,8 @@ class OfflineTts { @@ -43,7 +43,8 @@ class OfflineTts {
43 // trained using the VCTK dataset. It is not used for 43 // trained using the VCTK dataset. It is not used for
44 // single-speaker models, e.g., models trained using the ljspeech 44 // single-speaker models, e.g., models trained using the ljspeech
45 // dataset. 45 // dataset.
46 - GeneratedAudio Generate(const std::string &text, int64_t sid = 0) const; 46 + GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
  47 + float speed = 1.0) const;
47 48
48 private: 49 private:
49 std::unique_ptr<OfflineTtsImpl> impl_; 50 std::unique_ptr<OfflineTtsImpl> impl_;
@@ -40,7 +40,8 @@ void PybindOfflineTts(py::module *m) { @@ -40,7 +40,8 @@ void PybindOfflineTts(py::module *m) {
40 using PyClass = OfflineTts; 40 using PyClass = OfflineTts;
41 py::class_<PyClass>(*m, "OfflineTts") 41 py::class_<PyClass>(*m, "OfflineTts")
42 .def(py::init<const OfflineTtsConfig &>(), py::arg("config")) 42 .def(py::init<const OfflineTtsConfig &>(), py::arg("config"))
43 - .def("generate", &PyClass::Generate, py::arg("text"), py::arg("sid") = 0); 43 + .def("generate", &PyClass::Generate, py::arg("text"), py::arg("sid") = 0,
  44 + py::arg("speed") = 1.0);
44 } 45 }
45 46
46 } // namespace sherpa_onnx 47 } // namespace sherpa_onnx