Committed by
GitHub
Add non-streaming speech recognition examples for MFC (#212)
正在显示
22 个修改的文件
包含
1153 行增加
和
63 行删除
| @@ -98,6 +98,7 @@ jobs: | @@ -98,6 +98,7 @@ jobs: | ||
| 98 | 98 | ||
| 99 | cd mfc-examples/$arch/Release | 99 | cd mfc-examples/$arch/Release |
| 100 | cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe | 100 | cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe |
| 101 | + cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe | ||
| 101 | ls -lh | 102 | ls -lh |
| 102 | 103 | ||
| 103 | - name: Upload artifact | 104 | - name: Upload artifact |
| @@ -106,10 +107,24 @@ jobs: | @@ -106,10 +107,24 @@ jobs: | ||
| 106 | name: streaming-speech-recognition-${{ matrix.arch }} | 107 | name: streaming-speech-recognition-${{ matrix.arch }} |
| 107 | path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe | 108 | path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe |
| 108 | 109 | ||
| 109 | - - name: Release pre-compiled binaries and libs for macOS | 110 | + - name: Upload artifact |
| 111 | + uses: actions/upload-artifact@v2 | ||
| 112 | + with: | ||
| 113 | + name: non-streaming-speech-recognition-${{ matrix.arch }} | ||
| 114 | + path: ./mfc-examples/${{ matrix.arch }}/Release/NonStreamingSpeechRecognition.exe | ||
| 115 | + | ||
| 116 | + - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }} | ||
| 117 | + if: env.RELEASE == 'true' | ||
| 118 | + uses: svenstaro/upload-release-action@v2 | ||
| 119 | + with: | ||
| 120 | + file_glob: true | ||
| 121 | + overwrite: true | ||
| 122 | + file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe | ||
| 123 | + | ||
| 124 | + - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }} | ||
| 110 | if: env.RELEASE == 'true' | 125 | if: env.RELEASE == 'true' |
| 111 | uses: svenstaro/upload-release-action@v2 | 126 | uses: svenstaro/upload-release-action@v2 |
| 112 | with: | 127 | with: |
| 113 | file_glob: true | 128 | file_glob: true |
| 114 | overwrite: true | 129 | overwrite: true |
| 115 | - file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx*.exe | 130 | + file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe |
| @@ -113,7 +113,7 @@ function(download_onnxruntime) | @@ -113,7 +113,7 @@ function(download_onnxruntime) | ||
| 113 | 113 | ||
| 114 | set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2") | 114 | set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2") |
| 115 | set(onnxruntime_URL2 "") | 115 | set(onnxruntime_URL2 "") |
| 116 | - set(onnxruntime_HASH "SHA256=a2b33a3e8a1f89cddf303f0a97a5a88f4202579c653cfb29158c8cf7da3734eb") | 116 | + set(onnxruntime_HASH "SHA256=94d9a30976b5c4a5dff7508d00f141835916e5a36315d5f53be9b3edb85148b5") |
| 117 | endif() | 117 | endif() |
| 118 | 118 | ||
| 119 | if(SHERPA_ONNX_ENABLE_GPU) | 119 | if(SHERPA_ONNX_ENABLE_GPU) |
| @@ -161,7 +161,7 @@ function(download_onnxruntime) | @@ -161,7 +161,7 @@ function(download_onnxruntime) | ||
| 161 | 161 | ||
| 162 | set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2") | 162 | set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2") |
| 163 | set(onnxruntime_URL2 "") | 163 | set(onnxruntime_URL2 "") |
| 164 | - set(onnxruntime_HASH "SHA256=f5c19ac1fc6a61c78a231a41df10aede2586665ab397bdc3f007eb8d2c8d4a19") | 164 | + set(onnxruntime_HASH "SHA256=c809a8510a89b8b37ae7d563c39229db22bac8fbefcbfe5c81a60b367d065b1b") |
| 165 | endif() | 165 | endif() |
| 166 | endif() | 166 | endif() |
| 167 | # After downloading, it contains: | 167 | # After downloading, it contains: |
| 1 | + | ||
| 2 | +// NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the | ||
| 3 | +// application. | ||
| 4 | +// | ||
| 5 | + | ||
| 6 | +// clang-format off | ||
| 7 | +#include "pch.h" | ||
| 8 | +#include "framework.h" | ||
| 9 | +#include "NonStreamingSpeechRecognitionDlg.h" | ||
| 10 | +#include "NonStreamingSpeechRecognition.h" | ||
| 11 | +// clang-format on | ||
| 12 | + | ||
| 13 | +#ifdef _DEBUG | ||
| 14 | +#define new DEBUG_NEW | ||
| 15 | +#endif | ||
| 16 | + | ||
| 17 | +// CNonStreamingSpeechRecognitionApp | ||
| 18 | + | ||
| 19 | +BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionApp, CWinApp) | ||
| 20 | +ON_COMMAND(ID_HELP, &CWinApp::OnHelp) | ||
| 21 | +END_MESSAGE_MAP() | ||
| 22 | + | ||
| 23 | +// CNonStreamingSpeechRecognitionApp construction | ||
| 24 | + | ||
| 25 | +CNonStreamingSpeechRecognitionApp::CNonStreamingSpeechRecognitionApp() { | ||
| 26 | + // TODO: add construction code here, | ||
| 27 | + // Place all significant initialization in InitInstance | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +// The one and only CNonStreamingSpeechRecognitionApp object | ||
| 31 | + | ||
| 32 | +CNonStreamingSpeechRecognitionApp theApp; | ||
| 33 | + | ||
| 34 | +// CNonStreamingSpeechRecognitionApp initialization | ||
| 35 | + | ||
| 36 | +BOOL CNonStreamingSpeechRecognitionApp::InitInstance() { | ||
| 37 | + CWinApp::InitInstance(); | ||
| 38 | + | ||
| 39 | + // Create the shell manager, in case the dialog contains | ||
| 40 | + // any shell tree view or shell list view controls. | ||
| 41 | + CShellManager *pShellManager = new CShellManager; | ||
| 42 | + | ||
| 43 | + // Activate "Windows Native" visual manager for enabling themes in MFC | ||
| 44 | + // controls | ||
| 45 | + CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows)); | ||
| 46 | + | ||
| 47 | + // Standard initialization | ||
| 48 | + // If you are not using these features and wish to reduce the size | ||
| 49 | + // of your final executable, you should remove from the following | ||
| 50 | + // the specific initialization routines you do not need | ||
| 51 | + // Change the registry key under which our settings are stored | ||
| 52 | + // TODO: You should modify this string to be something appropriate | ||
| 53 | + // such as the name of your company or organization | ||
| 54 | + SetRegistryKey(_T("Local AppWizard-Generated Applications")); | ||
| 55 | + | ||
| 56 | + CNonStreamingSpeechRecognitionDlg dlg; | ||
| 57 | + m_pMainWnd = &dlg; | ||
| 58 | + INT_PTR nResponse = dlg.DoModal(); | ||
| 59 | + if (nResponse == IDOK) { | ||
| 60 | + // TODO: Place code here to handle when the dialog is | ||
| 61 | + // dismissed with OK | ||
| 62 | + } else if (nResponse == IDCANCEL) { | ||
| 63 | + // TODO: Place code here to handle when the dialog is | ||
| 64 | + // dismissed with Cancel | ||
| 65 | + } else if (nResponse == -1) { | ||
| 66 | + TRACE(traceAppMsg, 0, | ||
| 67 | + "Warning: dialog creation failed, so application is terminating " | ||
| 68 | + "unexpectedly.\n"); | ||
| 69 | + TRACE(traceAppMsg, 0, | ||
| 70 | + "Warning: if you are using MFC controls on the dialog, you cannot " | ||
| 71 | + "#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n"); | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + // Delete the shell manager created above. | ||
| 75 | + if (pShellManager != nullptr) { | ||
| 76 | + delete pShellManager; | ||
| 77 | + } | ||
| 78 | + | ||
| 79 | +#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS) | ||
| 80 | + ControlBarCleanUp(); | ||
| 81 | +#endif | ||
| 82 | + | ||
| 83 | + // Since the dialog has been closed, return FALSE so that we exit the | ||
| 84 | + // application, rather than start the application's message pump. | ||
| 85 | + return FALSE; | ||
| 86 | +} |
| 1 | + | ||
| 2 | +// NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME | ||
| 3 | +// application | ||
| 4 | +// | ||
| 5 | + | ||
| 6 | +#pragma once | ||
| 7 | + | ||
| 8 | +#ifndef __AFXWIN_H__ | ||
| 9 | +#error "include 'pch.h' before including this file for PCH" | ||
| 10 | +#endif | ||
| 11 | + | ||
| 12 | +#include "resource.h" // main symbols | ||
| 13 | + | ||
| 14 | +// CNonStreamingSpeechRecognitionApp: | ||
| 15 | +// See NonStreamingSpeechRecognition.cpp for the implementation of this class | ||
| 16 | +// | ||
| 17 | + | ||
| 18 | +class CNonStreamingSpeechRecognitionApp : public CWinApp { | ||
| 19 | + public: | ||
| 20 | + CNonStreamingSpeechRecognitionApp(); | ||
| 21 | + | ||
| 22 | + // Overrides | ||
| 23 | + public: | ||
| 24 | + virtual BOOL InitInstance(); | ||
| 25 | + | ||
| 26 | + // Implementation | ||
| 27 | + | ||
| 28 | + DECLARE_MESSAGE_MAP() | ||
| 29 | +}; | ||
| 30 | + | ||
| 31 | +extern CNonStreamingSpeechRecognitionApp theApp; |
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
| 3 | + <ItemGroup Label="ProjectConfigurations"> | ||
| 4 | + <ProjectConfiguration Include="Debug|Win32"> | ||
| 5 | + <Configuration>Debug</Configuration> | ||
| 6 | + <Platform>Win32</Platform> | ||
| 7 | + </ProjectConfiguration> | ||
| 8 | + <ProjectConfiguration Include="Release|Win32"> | ||
| 9 | + <Configuration>Release</Configuration> | ||
| 10 | + <Platform>Win32</Platform> | ||
| 11 | + </ProjectConfiguration> | ||
| 12 | + <ProjectConfiguration Include="Debug|x64"> | ||
| 13 | + <Configuration>Debug</Configuration> | ||
| 14 | + <Platform>x64</Platform> | ||
| 15 | + </ProjectConfiguration> | ||
| 16 | + <ProjectConfiguration Include="Release|x64"> | ||
| 17 | + <Configuration>Release</Configuration> | ||
| 18 | + <Platform>x64</Platform> | ||
| 19 | + </ProjectConfiguration> | ||
| 20 | + </ItemGroup> | ||
| 21 | + <PropertyGroup Label="Globals"> | ||
| 22 | + <VCProjectVersion>17.0</VCProjectVersion> | ||
| 23 | + <ProjectGuid>{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}</ProjectGuid> | ||
| 24 | + <Keyword>MFCProj</Keyword> | ||
| 25 | + <RootNamespace>NonStreamingSpeechRecognition</RootNamespace> | ||
| 26 | + <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion> | ||
| 27 | + </PropertyGroup> | ||
| 28 | + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> | ||
| 29 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> | ||
| 30 | + <ConfigurationType>Application</ConfigurationType> | ||
| 31 | + <UseDebugLibraries>true</UseDebugLibraries> | ||
| 32 | + <PlatformToolset>v143</PlatformToolset> | ||
| 33 | + <CharacterSet>Unicode</CharacterSet> | ||
| 34 | + <UseOfMfc>Static</UseOfMfc> | ||
| 35 | + </PropertyGroup> | ||
| 36 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> | ||
| 37 | + <ConfigurationType>Application</ConfigurationType> | ||
| 38 | + <UseDebugLibraries>false</UseDebugLibraries> | ||
| 39 | + <PlatformToolset>v143</PlatformToolset> | ||
| 40 | + <WholeProgramOptimization>true</WholeProgramOptimization> | ||
| 41 | + <CharacterSet>Unicode</CharacterSet> | ||
| 42 | + <UseOfMfc>Static</UseOfMfc> | ||
| 43 | + </PropertyGroup> | ||
| 44 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> | ||
| 45 | + <ConfigurationType>Application</ConfigurationType> | ||
| 46 | + <UseDebugLibraries>true</UseDebugLibraries> | ||
| 47 | + <PlatformToolset>v143</PlatformToolset> | ||
| 48 | + <CharacterSet>Unicode</CharacterSet> | ||
| 49 | + <UseOfMfc>Static</UseOfMfc> | ||
| 50 | + </PropertyGroup> | ||
| 51 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> | ||
| 52 | + <ConfigurationType>Application</ConfigurationType> | ||
| 53 | + <UseDebugLibraries>false</UseDebugLibraries> | ||
| 54 | + <PlatformToolset>v143</PlatformToolset> | ||
| 55 | + <WholeProgramOptimization>true</WholeProgramOptimization> | ||
| 56 | + <CharacterSet>Unicode</CharacterSet> | ||
| 57 | + <UseOfMfc>Static</UseOfMfc> | ||
| 58 | + </PropertyGroup> | ||
| 59 | + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> | ||
| 60 | + <ImportGroup Label="ExtensionSettings"> | ||
| 61 | + </ImportGroup> | ||
| 62 | + <ImportGroup Label="Shared"> | ||
| 63 | + </ImportGroup> | ||
| 64 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
| 65 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| 66 | + <Import Project="sherpa-onnx-deps.props" /> | ||
| 67 | + </ImportGroup> | ||
| 68 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
| 69 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| 70 | + <Import Project="sherpa-onnx-deps.props" /> | ||
| 71 | + </ImportGroup> | ||
| 72 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
| 73 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| 74 | + <Import Project="sherpa-onnx-deps.props" /> | ||
| 75 | + </ImportGroup> | ||
| 76 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
| 77 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
| 78 | + <Import Project="sherpa-onnx-deps.props" /> | ||
| 79 | + </ImportGroup> | ||
| 80 | + <PropertyGroup Label="UserMacros" /> | ||
| 81 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
| 82 | + <LinkIncremental>false</LinkIncremental> | ||
| 83 | + </PropertyGroup> | ||
| 84 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
| 85 | + <LinkIncremental>true</LinkIncremental> | ||
| 86 | + </PropertyGroup> | ||
| 87 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
| 88 | + <LinkIncremental>true</LinkIncremental> | ||
| 89 | + </PropertyGroup> | ||
| 90 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
| 91 | + <LinkIncremental>false</LinkIncremental> | ||
| 92 | + </PropertyGroup> | ||
| 93 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
| 94 | + <ClCompile> | ||
| 95 | + <PrecompiledHeader>Use</PrecompiledHeader> | ||
| 96 | + <WarningLevel>Level3</WarningLevel> | ||
| 97 | + <FunctionLevelLinking>true</FunctionLevelLinking> | ||
| 98 | + <IntrinsicFunctions>true</IntrinsicFunctions> | ||
| 99 | + <SDLCheck>true</SDLCheck> | ||
| 100 | + <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 101 | + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile> | ||
| 102 | + </ClCompile> | ||
| 103 | + <Link> | ||
| 104 | + <SubSystem>Windows</SubSystem> | ||
| 105 | + <EnableCOMDATFolding>true</EnableCOMDATFolding> | ||
| 106 | + <OptimizeReferences>true</OptimizeReferences> | ||
| 107 | + </Link> | ||
| 108 | + <Midl> | ||
| 109 | + <MkTypLibCompatible>false</MkTypLibCompatible> | ||
| 110 | + <ValidateAllParameters>true</ValidateAllParameters> | ||
| 111 | + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 112 | + </Midl> | ||
| 113 | + <ResourceCompile> | ||
| 114 | + <Culture>0x0409</Culture> | ||
| 115 | + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 116 | + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
| 117 | + </ResourceCompile> | ||
| 118 | + </ItemDefinitionGroup> | ||
| 119 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
| 120 | + <ClCompile> | ||
| 121 | + <PrecompiledHeader>Use</PrecompiledHeader> | ||
| 122 | + <WarningLevel>Level3</WarningLevel> | ||
| 123 | + <SDLCheck>true</SDLCheck> | ||
| 124 | + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 125 | + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile> | ||
| 126 | + </ClCompile> | ||
| 127 | + <Link> | ||
| 128 | + <SubSystem>Windows</SubSystem> | ||
| 129 | + </Link> | ||
| 130 | + <Midl> | ||
| 131 | + <MkTypLibCompatible>false</MkTypLibCompatible> | ||
| 132 | + <ValidateAllParameters>true</ValidateAllParameters> | ||
| 133 | + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 134 | + </Midl> | ||
| 135 | + <ResourceCompile> | ||
| 136 | + <Culture>0x0409</Culture> | ||
| 137 | + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 138 | + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
| 139 | + </ResourceCompile> | ||
| 140 | + </ItemDefinitionGroup> | ||
| 141 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
| 142 | + <ClCompile> | ||
| 143 | + <PrecompiledHeader>Use</PrecompiledHeader> | ||
| 144 | + <WarningLevel>Level3</WarningLevel> | ||
| 145 | + <SDLCheck>true</SDLCheck> | ||
| 146 | + <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 147 | + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile> | ||
| 148 | + </ClCompile> | ||
| 149 | + <Link> | ||
| 150 | + <SubSystem>Windows</SubSystem> | ||
| 151 | + </Link> | ||
| 152 | + <Midl> | ||
| 153 | + <MkTypLibCompatible>false</MkTypLibCompatible> | ||
| 154 | + <ValidateAllParameters>true</ValidateAllParameters> | ||
| 155 | + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 156 | + </Midl> | ||
| 157 | + <ResourceCompile> | ||
| 158 | + <Culture>0x0409</Culture> | ||
| 159 | + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 160 | + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
| 161 | + </ResourceCompile> | ||
| 162 | + </ItemDefinitionGroup> | ||
| 163 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
| 164 | + <ClCompile> | ||
| 165 | + <PrecompiledHeader>Use</PrecompiledHeader> | ||
| 166 | + <WarningLevel>Level3</WarningLevel> | ||
| 167 | + <FunctionLevelLinking>true</FunctionLevelLinking> | ||
| 168 | + <IntrinsicFunctions>true</IntrinsicFunctions> | ||
| 169 | + <SDLCheck>true</SDLCheck> | ||
| 170 | + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 171 | + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile> | ||
| 172 | + </ClCompile> | ||
| 173 | + <Link> | ||
| 174 | + <SubSystem>Windows</SubSystem> | ||
| 175 | + <EnableCOMDATFolding>true</EnableCOMDATFolding> | ||
| 176 | + <OptimizeReferences>true</OptimizeReferences> | ||
| 177 | + </Link> | ||
| 178 | + <Midl> | ||
| 179 | + <MkTypLibCompatible>false</MkTypLibCompatible> | ||
| 180 | + <ValidateAllParameters>true</ValidateAllParameters> | ||
| 181 | + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 182 | + </Midl> | ||
| 183 | + <ResourceCompile> | ||
| 184 | + <Culture>0x0409</Culture> | ||
| 185 | + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
| 186 | + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
| 187 | + </ResourceCompile> | ||
| 188 | + </ItemDefinitionGroup> | ||
| 189 | + <ItemGroup> | ||
| 190 | + <ClInclude Include="framework.h" /> | ||
| 191 | + <ClInclude Include="NonStreamingSpeechRecognition.h" /> | ||
| 192 | + <ClInclude Include="NonStreamingSpeechRecognitionDlg.h" /> | ||
| 193 | + <ClInclude Include="pch.h" /> | ||
| 194 | + <ClInclude Include="Resource.h" /> | ||
| 195 | + <ClInclude Include="targetver.h" /> | ||
| 196 | + </ItemGroup> | ||
| 197 | + <ItemGroup> | ||
| 198 | + <ClCompile Include="NonStreamingSpeechRecognition.cpp" /> | ||
| 199 | + <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp" /> | ||
| 200 | + <ClCompile Include="pch.cpp"> | ||
| 201 | + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader> | ||
| 202 | + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader> | ||
| 203 | + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader> | ||
| 204 | + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader> | ||
| 205 | + </ClCompile> | ||
| 206 | + </ItemGroup> | ||
| 207 | + <ItemGroup> | ||
| 208 | + <ResourceCompile Include="NonStreamingSpeechRecognition.rc" /> | ||
| 209 | + </ItemGroup> | ||
| 210 | + <ItemGroup> | ||
| 211 | + <None Include="res\NonStreamingSpeechRecognition.rc2" /> | ||
| 212 | + </ItemGroup> | ||
| 213 | + <ItemGroup> | ||
| 214 | + <Image Include="res\NonStreamingSpeechRecognition.ico" /> | ||
| 215 | + </ItemGroup> | ||
| 216 | + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> | ||
| 217 | + <ImportGroup Label="ExtensionTargets"> | ||
| 218 | + </ImportGroup> | ||
| 219 | +</Project> |
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
| 3 | + <ItemGroup> | ||
| 4 | + <Filter Include="Source Files"> | ||
| 5 | + <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> | ||
| 6 | + <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions> | ||
| 7 | + </Filter> | ||
| 8 | + <Filter Include="Header Files"> | ||
| 9 | + <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> | ||
| 10 | + <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions> | ||
| 11 | + </Filter> | ||
| 12 | + <Filter Include="Resource Files"> | ||
| 13 | + <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> | ||
| 14 | + <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> | ||
| 15 | + </Filter> | ||
| 16 | + </ItemGroup> | ||
| 17 | + <ItemGroup> | ||
| 18 | + <ClInclude Include="NonStreamingSpeechRecognition.h"> | ||
| 19 | + <Filter>Header Files</Filter> | ||
| 20 | + </ClInclude> | ||
| 21 | + <ClInclude Include="NonStreamingSpeechRecognitionDlg.h"> | ||
| 22 | + <Filter>Header Files</Filter> | ||
| 23 | + </ClInclude> | ||
| 24 | + <ClInclude Include="framework.h"> | ||
| 25 | + <Filter>Header Files</Filter> | ||
| 26 | + </ClInclude> | ||
| 27 | + <ClInclude Include="targetver.h"> | ||
| 28 | + <Filter>Header Files</Filter> | ||
| 29 | + </ClInclude> | ||
| 30 | + <ClInclude Include="Resource.h"> | ||
| 31 | + <Filter>Header Files</Filter> | ||
| 32 | + </ClInclude> | ||
| 33 | + <ClInclude Include="pch.h"> | ||
| 34 | + <Filter>Header Files</Filter> | ||
| 35 | + </ClInclude> | ||
| 36 | + </ItemGroup> | ||
| 37 | + <ItemGroup> | ||
| 38 | + <ClCompile Include="NonStreamingSpeechRecognition.cpp"> | ||
| 39 | + <Filter>Source Files</Filter> | ||
| 40 | + </ClCompile> | ||
| 41 | + <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp"> | ||
| 42 | + <Filter>Source Files</Filter> | ||
| 43 | + </ClCompile> | ||
| 44 | + <ClCompile Include="pch.cpp"> | ||
| 45 | + <Filter>Source Files</Filter> | ||
| 46 | + </ClCompile> | ||
| 47 | + </ItemGroup> | ||
| 48 | + <ItemGroup> | ||
| 49 | + <ResourceCompile Include="NonStreamingSpeechRecognition.rc"> | ||
| 50 | + <Filter>Resource Files</Filter> | ||
| 51 | + </ResourceCompile> | ||
| 52 | + </ItemGroup> | ||
| 53 | + <ItemGroup> | ||
| 54 | + <None Include="res\NonStreamingSpeechRecognition.rc2"> | ||
| 55 | + <Filter>Resource Files</Filter> | ||
| 56 | + </None> | ||
| 57 | + </ItemGroup> | ||
| 58 | + <ItemGroup> | ||
| 59 | + <Image Include="res\NonStreamingSpeechRecognition.ico"> | ||
| 60 | + <Filter>Resource Files</Filter> | ||
| 61 | + </Image> | ||
| 62 | + </ItemGroup> | ||
| 63 | +</Project> |
| 1 | + | ||
| 2 | +// NonStreamingSpeechRecognitionDlg.cpp : implementation file | ||
| 3 | +// | ||
| 4 | + | ||
| 5 | +// clang-format off | ||
| 6 | +#include "pch.h" | ||
| 7 | +#include "framework.h" | ||
| 8 | +#include "afxdialogex.h" | ||
| 9 | +#include "NonStreamingSpeechRecognition.h" | ||
| 10 | +#include "NonStreamingSpeechRecognitionDlg.h" | ||
| 11 | +// clang-format on | ||
| 12 | + | ||
| 13 | +#include <fstream> | ||
| 14 | +#include <sstream> | ||
| 15 | +#include <string> | ||
| 16 | +#include <vector> | ||
| 17 | + | ||
| 18 | +#ifdef _DEBUG | ||
| 19 | +#define new DEBUG_NEW | ||
| 20 | +#endif | ||
| 21 | + | ||
| 22 | +Microphone::Microphone() { | ||
| 23 | + PaError err = Pa_Initialize(); | ||
| 24 | + if (err != paNoError) { | ||
| 25 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 26 | + exit(-2); | ||
| 27 | + } | ||
| 28 | +} | ||
| 29 | + | ||
| 30 | +Microphone::~Microphone() { | ||
| 31 | + PaError err = Pa_Terminate(); | ||
| 32 | + if (err != paNoError) { | ||
| 33 | + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err)); | ||
| 34 | + exit(-2); | ||
| 35 | + } | ||
| 36 | +} | ||
| 37 | + | ||
| 38 | +// see | ||
| 39 | +// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring | ||
| 40 | +static std::wstring Utf8ToUtf16(const std::string &utf8) { | ||
| 41 | + std::vector<unsigned long> unicode; | ||
| 42 | + size_t i = 0; | ||
| 43 | + while (i < utf8.size()) { | ||
| 44 | + unsigned long uni; | ||
| 45 | + size_t todo; | ||
| 46 | + bool error = false; | ||
| 47 | + unsigned char ch = utf8[i++]; | ||
| 48 | + if (ch <= 0x7F) { | ||
| 49 | + uni = ch; | ||
| 50 | + todo = 0; | ||
| 51 | + } else if (ch <= 0xBF) { | ||
| 52 | + throw std::logic_error("not a UTF-8 string"); | ||
| 53 | + } else if (ch <= 0xDF) { | ||
| 54 | + uni = ch & 0x1F; | ||
| 55 | + todo = 1; | ||
| 56 | + } else if (ch <= 0xEF) { | ||
| 57 | + uni = ch & 0x0F; | ||
| 58 | + todo = 2; | ||
| 59 | + } else if (ch <= 0xF7) { | ||
| 60 | + uni = ch & 0x07; | ||
| 61 | + todo = 3; | ||
| 62 | + } else { | ||
| 63 | + throw std::logic_error("not a UTF-8 string"); | ||
| 64 | + } | ||
| 65 | + for (size_t j = 0; j < todo; ++j) { | ||
| 66 | + if (i == utf8.size()) throw std::logic_error("not a UTF-8 string"); | ||
| 67 | + unsigned char ch = utf8[i++]; | ||
| 68 | + if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string"); | ||
| 69 | + uni <<= 6; | ||
| 70 | + uni += ch & 0x3F; | ||
| 71 | + } | ||
| 72 | + if (uni >= 0xD800 && uni <= 0xDFFF) | ||
| 73 | + throw std::logic_error("not a UTF-8 string"); | ||
| 74 | + if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string"); | ||
| 75 | + unicode.push_back(uni); | ||
| 76 | + } | ||
| 77 | + std::wstring utf16; | ||
| 78 | + for (size_t i = 0; i < unicode.size(); ++i) { | ||
| 79 | + unsigned long uni = unicode[i]; | ||
| 80 | + if (uni <= 0xFFFF) { | ||
| 81 | + utf16 += (wchar_t)uni; | ||
| 82 | + } else { | ||
| 83 | + uni -= 0x10000; | ||
| 84 | + utf16 += (wchar_t)((uni >> 10) + 0xD800); | ||
| 85 | + utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00); | ||
| 86 | + } | ||
| 87 | + } | ||
| 88 | + return utf16; | ||
| 89 | +} | ||
| 90 | + | ||
| 91 | +static std::string Cat(const std::vector<std::string> &results) { | ||
| 92 | + std::ostringstream os; | ||
| 93 | + std::string sep; | ||
| 94 | + | ||
| 95 | + int i = 0; | ||
| 96 | + for (i = 0; i != results.size(); ++i) { | ||
| 97 | + os << sep << i << ": " << results[i]; | ||
| 98 | + sep = "\r\n"; | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + return os.str(); | ||
| 102 | +} | ||
| 103 | + | ||
| 104 | +// CNonStreamingSpeechRecognitionDlg dialog | ||
| 105 | + | ||
| 106 | +CNonStreamingSpeechRecognitionDlg::CNonStreamingSpeechRecognitionDlg( | ||
| 107 | + CWnd *pParent /*=nullptr*/) | ||
| 108 | + : CDialogEx(IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG, pParent) { | ||
| 109 | + m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME); | ||
| 110 | +} | ||
| 111 | + | ||
| 112 | +CNonStreamingSpeechRecognitionDlg::~CNonStreamingSpeechRecognitionDlg() { | ||
| 113 | + if (recognizer_) { | ||
| 114 | + DestroyOfflineRecognizer(recognizer_); | ||
| 115 | + recognizer_ = nullptr; | ||
| 116 | + } | ||
| 117 | +} | ||
| 118 | + | ||
| 119 | +void CNonStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) { | ||
| 120 | + CDialogEx::DoDataExchange(pDX); | ||
| 121 | + DDX_Control(pDX, IDC_EDIT1, my_text_); | ||
| 122 | + DDX_Control(pDX, IDOK, my_btn_); | ||
| 123 | +} | ||
| 124 | + | ||
| 125 | +BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionDlg, CDialogEx) | ||
| 126 | +ON_WM_PAINT() | ||
| 127 | +ON_WM_QUERYDRAGICON() | ||
| 128 | +ON_BN_CLICKED(IDOK, &CNonStreamingSpeechRecognitionDlg::OnBnClickedOk) | ||
| 129 | +END_MESSAGE_MAP() | ||
| 130 | + | ||
| 131 | +// CNonStreamingSpeechRecognitionDlg message handlers | ||
| 132 | + | ||
| 133 | +BOOL CNonStreamingSpeechRecognitionDlg::OnInitDialog() { | ||
| 134 | + CDialogEx::OnInitDialog(); | ||
| 135 | + | ||
| 136 | + // Set the icon for this dialog. The framework does this automatically | ||
| 137 | + // when the application's main window is not a dialog | ||
| 138 | + SetIcon(m_hIcon, TRUE); // Set big icon | ||
| 139 | + SetIcon(m_hIcon, FALSE); // Set small icon | ||
| 140 | + | ||
| 141 | + // TODO: Add extra initialization here | ||
| 142 | + InitMicrophone(); | ||
| 143 | + | ||
| 144 | + return TRUE; // return TRUE unless you set the focus to a control | ||
| 145 | +} | ||
| 146 | + | ||
| 147 | +// If you add a minimize button to your dialog, you will need the code below | ||
| 148 | +// to draw the icon. For MFC applications using the document/view model, | ||
| 149 | +// this is automatically done for you by the framework. | ||
| 150 | + | ||
| 151 | +void CNonStreamingSpeechRecognitionDlg::OnPaint() { | ||
| 152 | + if (IsIconic()) { | ||
| 153 | + CPaintDC dc(this); // device context for painting | ||
| 154 | + | ||
| 155 | + SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()), | ||
| 156 | + 0); | ||
| 157 | + | ||
| 158 | + // Center icon in client rectangle | ||
| 159 | + int cxIcon = GetSystemMetrics(SM_CXICON); | ||
| 160 | + int cyIcon = GetSystemMetrics(SM_CYICON); | ||
| 161 | + CRect rect; | ||
| 162 | + GetClientRect(&rect); | ||
| 163 | + int x = (rect.Width() - cxIcon + 1) / 2; | ||
| 164 | + int y = (rect.Height() - cyIcon + 1) / 2; | ||
| 165 | + | ||
| 166 | + // Draw the icon | ||
| 167 | + dc.DrawIcon(x, y, m_hIcon); | ||
| 168 | + } else { | ||
| 169 | + CDialogEx::OnPaint(); | ||
| 170 | + } | ||
| 171 | +} | ||
| 172 | + | ||
| 173 | +// The system calls this function to obtain the cursor to display while the user | ||
| 174 | +// drags | ||
| 175 | +// the minimized window. | ||
| 176 | +HCURSOR CNonStreamingSpeechRecognitionDlg::OnQueryDragIcon() { | ||
| 177 | + return static_cast<HCURSOR>(m_hIcon); | ||
| 178 | +} | ||
| 179 | + | ||
| 180 | +static int32_t RecordCallback(const void *input_buffer, | ||
| 181 | + void * /*output_buffer*/, | ||
| 182 | + unsigned long frames_per_buffer, // NOLINT | ||
| 183 | + const PaStreamCallbackTimeInfo * /*time_info*/, | ||
| 184 | + PaStreamCallbackFlags /*status_flags*/, | ||
| 185 | + void *user_data) { | ||
| 186 | + auto dlg = reinterpret_cast<CNonStreamingSpeechRecognitionDlg *>(user_data); | ||
| 187 | + auto begin = reinterpret_cast<const float *>(input_buffer); | ||
| 188 | + auto end = begin + frames_per_buffer; | ||
| 189 | + dlg->samples_.insert(dlg->samples_.end(), begin, end); | ||
| 190 | + | ||
| 191 | + return dlg->started_ ? paContinue : paComplete; | ||
| 192 | +} | ||
| 193 | + | ||
| 194 | +void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() { | ||
| 195 | + if (!recognizer_) { | ||
| 196 | + AppendLineToMultilineEditCtrl("Creating recognizer..."); | ||
| 197 | + AppendLineToMultilineEditCtrl("It will take several seconds. Please wait"); | ||
| 198 | + InitRecognizer(); | ||
| 199 | + if (!recognizer_) { | ||
| 200 | + // failed to create the recognizer | ||
| 201 | + return; | ||
| 202 | + } | ||
| 203 | + AppendLineToMultilineEditCtrl("Recognizer created!"); | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + if (!started_) { | ||
| 207 | + samples_.clear(); | ||
| 208 | + started_ = true; | ||
| 209 | + | ||
| 210 | + PaStreamParameters param; | ||
| 211 | + param.device = Pa_GetDefaultInputDevice(); | ||
| 212 | + const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device); | ||
| 213 | + param.channelCount = 1; | ||
| 214 | + param.sampleFormat = paFloat32; | ||
| 215 | + param.suggestedLatency = info->defaultLowInputLatency; | ||
| 216 | + param.hostApiSpecificStreamInfo = nullptr; | ||
| 217 | + float sample_rate = config_.feat_config.sample_rate; | ||
| 218 | + pa_stream_ = nullptr; | ||
| 219 | + PaError err = | ||
| 220 | + Pa_OpenStream(&pa_stream_, ¶m, nullptr, /* &outputParameters, */ | ||
| 221 | + sample_rate, | ||
| 222 | + 0, // frames per buffer | ||
| 223 | + paClipOff, // we won't output out of range samples | ||
| 224 | + // so don't bother clipping them | ||
| 225 | + RecordCallback, this); | ||
| 226 | + if (err != paNoError) { | ||
| 227 | + AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") + | ||
| 228 | + Pa_GetErrorText(err)); | ||
| 229 | + my_btn_.EnableWindow(FALSE); | ||
| 230 | + return; | ||
| 231 | + } | ||
| 232 | + | ||
| 233 | + err = Pa_StartStream(pa_stream_); | ||
| 234 | + if (err != paNoError) { | ||
| 235 | + AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") + | ||
| 236 | + Pa_GetErrorText(err)); | ||
| 237 | + my_btn_.EnableWindow(FALSE); | ||
| 238 | + return; | ||
| 239 | + } | ||
| 240 | + AppendLineToMultilineEditCtrl( | ||
| 241 | + "\r\nStarted! Please speak and click stop.\r\n"); | ||
| 242 | + my_btn_.SetWindowText(_T("Stop")); | ||
| 243 | + | ||
| 244 | + } else { | ||
| 245 | + started_ = false; | ||
| 246 | + | ||
| 247 | + Pa_Sleep(200); // sleep for 200ms | ||
| 248 | + if (pa_stream_) { | ||
| 249 | + PaError err = Pa_CloseStream(pa_stream_); | ||
| 250 | + if (err != paNoError) { | ||
| 251 | + AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") + | ||
| 252 | + Pa_GetErrorText(err)); | ||
| 253 | + my_btn_.EnableWindow(FALSE); | ||
| 254 | + return; | ||
| 255 | + } | ||
| 256 | + } | ||
| 257 | + pa_stream_ = nullptr; | ||
| 258 | + | ||
| 259 | + SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer_); | ||
| 260 | + | ||
| 261 | + AcceptWaveformOffline(stream, config_.feat_config.sample_rate, | ||
| 262 | + samples_.data(), samples_.size()); | ||
| 263 | + DecodeOfflineStream(recognizer_, stream); | ||
| 264 | + SherpaOnnxOfflineRecognizerResult *r = GetOfflineStreamResult(stream); | ||
| 265 | + results_.emplace_back(r->text); | ||
| 266 | + | ||
| 267 | + auto str = Utf8ToUtf16(Cat(results_).c_str()); | ||
| 268 | + my_text_.SetWindowText(str.c_str()); | ||
| 269 | + my_text_.SetFocus(); | ||
| 270 | + my_text_.SetSel(-1); | ||
| 271 | + | ||
| 272 | + DestroyOfflineRecognizerResult(r); | ||
| 273 | + | ||
| 274 | + DestroyOfflineStream(stream); | ||
| 275 | + // AfxMessageBox("Stopped", MB_OK); | ||
| 276 | + my_btn_.SetWindowText(_T("Start")); | ||
| 277 | + AppendLineToMultilineEditCtrl("\r\nStopped. Please click start and speak"); | ||
| 278 | + } | ||
| 279 | +} | ||
| 280 | + | ||
| 281 | +void CNonStreamingSpeechRecognitionDlg::InitMicrophone() { | ||
| 282 | + int default_device = Pa_GetDefaultInputDevice(); | ||
| 283 | + int device_count = Pa_GetDeviceCount(); | ||
| 284 | + if (default_device == paNoDevice) { | ||
| 285 | + // CString str; | ||
| 286 | + // str.Format(_T("No default input device found!")); | ||
| 287 | + // AfxMessageBox(str, MB_OK | MB_ICONSTOP); | ||
| 288 | + // exit(-1); | ||
| 289 | + AppendLineToMultilineEditCtrl("No default input device found!"); | ||
| 290 | + my_btn_.EnableWindow(FALSE); | ||
| 291 | + return; | ||
| 292 | + } | ||
| 293 | + AppendLineToMultilineEditCtrl(std::string("Selected device ") + | ||
| 294 | + Pa_GetDeviceInfo(default_device)->name); | ||
| 295 | +} | ||
| 296 | + | ||
| 297 | +bool CNonStreamingSpeechRecognitionDlg::Exists(const std::string &filename) { | ||
| 298 | + std::ifstream is(filename); | ||
| 299 | + return is.good(); | ||
| 300 | +} | ||
| 301 | + | ||
| 302 | +void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() { | ||
| 303 | + my_btn_.EnableWindow(FALSE); | ||
| 304 | + std::string msg = | ||
| 305 | + "\r\nPlease go to\r\n" | ||
| 306 | + "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html " | ||
| 307 | + "\r\n"; | ||
| 308 | + msg += "to download a non-streaming model, i.e., an offline model.\r\n"; | ||
| 309 | + msg += | ||
| 310 | + "You need to rename them to encoder.onnx, decoder.onnx, and " | ||
| 311 | + "joiner.onnx correspoondingly.\r\n\r\n"; | ||
| 312 | + msg += "It supports both transducer models and paraformer models.\r\n\r\n"; | ||
| 313 | + msg += | ||
| 314 | + "We give two examples below to show you how to download models\r\n\r\n"; | ||
| 315 | + msg += "(1) Transducer\r\n\r\n"; | ||
| 316 | + msg += | ||
| 317 | + "We use " | ||
| 318 | + "https://huggingface.co/pkufool/" | ||
| 319 | + "icefall-asr-zipformer-wenetspeech-20230615 below\r\n"; | ||
| 320 | + msg += | ||
| 321 | + "wget " | ||
| 322 | + "https://huggingface.co/pkufool/" | ||
| 323 | + "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/" | ||
| 324 | + "encoder-epoch-12-avg-4.onnx\r\n"; | ||
| 325 | + msg += | ||
| 326 | + "wget " | ||
| 327 | + "https://huggingface.co/pkufool/" | ||
| 328 | + "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/" | ||
| 329 | + "decoder-epoch-12-avg-4.onnx\r\n"; | ||
| 330 | + msg += | ||
| 331 | + "wget " | ||
| 332 | + "https://huggingface.co/pkufool/" | ||
| 333 | + "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/" | ||
| 334 | + "joiner-epoch-12-avg-4.onnx\r\n"; | ||
| 335 | + msg += "\r\n Now rename them\r\n"; | ||
| 336 | + msg += "mv encoder-epoch-12-avg-4.onnx encoder.onnx\r\n"; | ||
| 337 | + msg += "mv decoder-epoch-12-avg-4.onnx decoder.onnx\r\n"; | ||
| 338 | + msg += "mv joiner-epoch-12-avg-4.onnx joiner.onnx\r\n\r\n"; | ||
| 339 | + msg += "(2) Paraformer\r\n\r\n"; | ||
| 340 | + msg += | ||
| 341 | + "wget " | ||
| 342 | + "https://huggingface.co/csukuangfj/" | ||
| 343 | + "sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/model.onnx\r\n"; | ||
| 344 | + msg += | ||
| 345 | + "wget " | ||
| 346 | + "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/" | ||
| 347 | + "resolve/main/tokens.txt\r\n\r\n"; | ||
| 348 | + msg += "\r\n Now rename them\r\n"; | ||
| 349 | + msg += "mv model.onnx paraformer.onnx\r\n"; | ||
| 350 | + msg += "\r\n"; | ||
| 351 | + msg += "That's it!\r\n"; | ||
| 352 | + | ||
| 353 | + AppendLineToMultilineEditCtrl(msg); | ||
| 354 | +} | ||
| 355 | + | ||
| 356 | +void CNonStreamingSpeechRecognitionDlg::InitParaformer() { | ||
| 357 | + std::string paraformer = "./paraformer.onnx"; | ||
| 358 | + std::string tokens = "./tokens.txt"; | ||
| 359 | + | ||
| 360 | + bool is_ok = true; | ||
| 361 | + | ||
| 362 | + if (Exists("./paraformer.int8.onnx")) { | ||
| 363 | + paraformer = "./paraformer.int8.onnx"; | ||
| 364 | + } else if (!Exists(paraformer)) { | ||
| 365 | + std::string msg = paraformer + " does not exist!"; | ||
| 366 | + AppendLineToMultilineEditCtrl(msg); | ||
| 367 | + is_ok = false; | ||
| 368 | + } | ||
| 369 | + | ||
| 370 | + if (!Exists(tokens)) { | ||
| 371 | + std::string msg = tokens + " does not exist!"; | ||
| 372 | + AppendLineToMultilineEditCtrl(msg); | ||
| 373 | + is_ok = false; | ||
| 374 | + } | ||
| 375 | + | ||
| 376 | + if (!is_ok) { | ||
| 377 | + ShowInitRecognizerHelpMessage(); | ||
| 378 | + return; | ||
| 379 | + } | ||
| 380 | + | ||
| 381 | + memset(&config_, 0, sizeof(config_)); | ||
| 382 | + | ||
| 383 | + config_.feat_config.sample_rate = 16000; | ||
| 384 | + config_.feat_config.feature_dim = 80; | ||
| 385 | + | ||
| 386 | + config_.model_config.paraformer.model = paraformer.c_str(); | ||
| 387 | + config_.model_config.tokens = tokens.c_str(); | ||
| 388 | + config_.model_config.num_threads = 1; | ||
| 389 | + config_.model_config.debug = 1; | ||
| 390 | + | ||
| 391 | + config_.decoding_method = "greedy_search"; | ||
| 392 | + config_.max_active_paths = 4; | ||
| 393 | + | ||
| 394 | + recognizer_ = CreateOfflineRecognizer(&config_); | ||
| 395 | +} | ||
| 396 | + | ||
| 397 | +void CNonStreamingSpeechRecognitionDlg::InitRecognizer() { | ||
| 398 | + if (Exists("./paraformer.onnx") || Exists("./paraformer.int8.onnx")) { | ||
| 399 | + InitParaformer(); | ||
| 400 | + return; | ||
| 401 | + } | ||
| 402 | + | ||
| 403 | + // assume it is transducer | ||
| 404 | + | ||
| 405 | + std::string encoder = "./encoder.onnx"; | ||
| 406 | + std::string decoder = "./decoder.onnx"; | ||
| 407 | + std::string joiner = "./joiner.onnx"; | ||
| 408 | + std::string tokens = "./tokens.txt"; | ||
| 409 | + | ||
| 410 | + bool is_ok = true; | ||
| 411 | + if (!Exists(encoder)) { | ||
| 412 | + std::string msg = encoder + " does not exist!"; | ||
| 413 | + AppendLineToMultilineEditCtrl(msg); | ||
| 414 | + is_ok = false; | ||
| 415 | + } | ||
| 416 | + | ||
| 417 | + if (!Exists(decoder)) { | ||
| 418 | + std::string msg = decoder + " does not exist!"; | ||
| 419 | + AppendLineToMultilineEditCtrl(msg); | ||
| 420 | + is_ok = false; | ||
| 421 | + } | ||
| 422 | + | ||
| 423 | + if (!Exists(joiner)) { | ||
| 424 | + std::string msg = joiner + " does not exist!"; | ||
| 425 | + AppendLineToMultilineEditCtrl(msg); | ||
| 426 | + is_ok = false; | ||
| 427 | + } | ||
| 428 | + | ||
| 429 | + if (!Exists(tokens)) { | ||
| 430 | + std::string msg = tokens + " does not exist!"; | ||
| 431 | + AppendLineToMultilineEditCtrl(msg); | ||
| 432 | + is_ok = false; | ||
| 433 | + } | ||
| 434 | + | ||
| 435 | + if (!is_ok) { | ||
| 436 | + ShowInitRecognizerHelpMessage(); | ||
| 437 | + return; | ||
| 438 | + } | ||
| 439 | + memset(&config_, 0, sizeof(config_)); | ||
| 440 | + | ||
| 441 | + config_.feat_config.sample_rate = 16000; | ||
| 442 | + config_.feat_config.feature_dim = 80; | ||
| 443 | + | ||
| 444 | + config_.model_config.transducer.encoder = encoder.c_str(); | ||
| 445 | + config_.model_config.transducer.decoder = decoder.c_str(); | ||
| 446 | + config_.model_config.transducer.joiner = joiner.c_str(); | ||
| 447 | + config_.model_config.tokens = tokens.c_str(); | ||
| 448 | + config_.model_config.num_threads = 1; | ||
| 449 | + config_.model_config.debug = 0; | ||
| 450 | + | ||
| 451 | + config_.decoding_method = "greedy_search"; | ||
| 452 | + config_.max_active_paths = 4; | ||
| 453 | + | ||
| 454 | + recognizer_ = CreateOfflineRecognizer(&config_); | ||
| 455 | +} | ||
| 456 | + | ||
| 457 | +void CNonStreamingSpeechRecognitionDlg::AppendTextToEditCtrl( | ||
| 458 | + const std::string &s) { | ||
| 459 | + // get the initial text length | ||
| 460 | + int nLength = my_text_.GetWindowTextLength(); | ||
| 461 | + // put the selection at the end of text | ||
| 462 | + my_text_.SetSel(nLength, nLength); | ||
| 463 | + // replace the selection | ||
| 464 | + | ||
| 465 | + std::wstring wstr = Utf8ToUtf16(s); | ||
| 466 | + | ||
| 467 | + my_text_.ReplaceSel(wstr.c_str()); | ||
| 468 | +} | ||
| 469 | + | ||
| 470 | +void CNonStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl( | ||
| 471 | + const std::string &s) { | ||
| 472 | + AppendTextToEditCtrl("\r\n" + s); | ||
| 473 | +} |
| 1 | + | ||
| 2 | +// NonStreamingSpeechRecognitionDlg.h : header file | ||
| 3 | +// | ||
| 4 | + | ||
| 5 | +#pragma once | ||
| 6 | + | ||
| 7 | +#include <string> | ||
| 8 | +#include <vector> | ||
| 9 | + | ||
| 10 | +#include "portaudio.h" | ||
| 11 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 12 | + | ||
| 13 | +class Microphone { | ||
| 14 | + public: | ||
| 15 | + Microphone(); | ||
| 16 | + ~Microphone(); | ||
| 17 | +}; | ||
| 18 | + | ||
| 19 | +// CNonStreamingSpeechRecognitionDlg dialog | ||
| 20 | +class CNonStreamingSpeechRecognitionDlg : public CDialogEx { | ||
| 21 | + // Construction | ||
| 22 | + public: | ||
| 23 | + CNonStreamingSpeechRecognitionDlg( | ||
| 24 | + CWnd *pParent = nullptr); // standard constructor | ||
| 25 | + ~CNonStreamingSpeechRecognitionDlg(); | ||
| 26 | + | ||
| 27 | +// Dialog Data | ||
| 28 | +#ifdef AFX_DESIGN_TIME | ||
| 29 | + enum { IDD = IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG }; | ||
| 30 | +#endif | ||
| 31 | + | ||
| 32 | + protected: | ||
| 33 | + virtual void DoDataExchange(CDataExchange *pDX); // DDX/DDV support | ||
| 34 | + | ||
| 35 | + // Implementation | ||
| 36 | + protected: | ||
| 37 | + HICON m_hIcon; | ||
| 38 | + | ||
| 39 | + // Generated message map functions | ||
| 40 | + virtual BOOL OnInitDialog(); | ||
| 41 | + afx_msg void OnPaint(); | ||
| 42 | + afx_msg HCURSOR OnQueryDragIcon(); | ||
| 43 | + DECLARE_MESSAGE_MAP() | ||
| 44 | + public: | ||
| 45 | + afx_msg void OnBnClickedOk(); | ||
| 46 | + int RunThread(); | ||
| 47 | + | ||
| 48 | + private: | ||
| 49 | + Microphone mic_; | ||
| 50 | + | ||
| 51 | + SherpaOnnxOfflineRecognizer *recognizer_ = nullptr; | ||
| 52 | + SherpaOnnxOfflineRecognizerConfig config_; | ||
| 53 | + | ||
| 54 | + PaStream *pa_stream_ = nullptr; | ||
| 55 | + CButton my_btn_; | ||
| 56 | + CEdit my_text_; | ||
| 57 | + std::vector<std::string> results_; | ||
| 58 | + | ||
| 59 | + public: | ||
| 60 | + bool started_ = false; | ||
| 61 | + std::vector<float> samples_; | ||
| 62 | + | ||
| 63 | + private: | ||
| 64 | + void AppendTextToEditCtrl(const std::string &s); | ||
| 65 | + void AppendLineToMultilineEditCtrl(const std::string &s); | ||
| 66 | + void InitMicrophone(); | ||
| 67 | + | ||
| 68 | + bool Exists(const std::string &filename); | ||
| 69 | + void InitRecognizer(); | ||
| 70 | + | ||
| 71 | + void InitParaformer(); | ||
| 72 | + void ShowInitRecognizerHelpMessage(); | ||
| 73 | +}; |
| 1 | +//{{NO_DEPENDENCIES}} | ||
| 2 | +// Microsoft Visual C++ generated include file. | ||
| 3 | +// Used by NonStreamingSpeechRecognition.rc | ||
| 4 | +// | ||
| 5 | +#define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102 | ||
| 6 | +#define IDR_MAINFRAME 128 | ||
| 7 | +#define IDC_EDIT1 1000 | ||
| 8 | + | ||
| 9 | +// Next default values for new objects | ||
| 10 | +// | ||
| 11 | +#ifdef APSTUDIO_INVOKED | ||
| 12 | +#ifndef APSTUDIO_READONLY_SYMBOLS | ||
| 13 | +#define _APS_NEXT_RESOURCE_VALUE 130 | ||
| 14 | +#define _APS_NEXT_COMMAND_VALUE 32771 | ||
| 15 | +#define _APS_NEXT_CONTROL_VALUE 1001 | ||
| 16 | +#define _APS_NEXT_SYMED_VALUE 101 | ||
| 17 | +#endif | ||
| 18 | +#endif |
| 1 | +#pragma once | ||
| 2 | + | ||
| 3 | +#ifndef VC_EXTRALEAN | ||
| 4 | +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers | ||
| 5 | +#endif | ||
| 6 | + | ||
| 7 | +#include "targetver.h" | ||
| 8 | + | ||
| 9 | +#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be | ||
| 10 | + // explicit | ||
| 11 | + | ||
| 12 | +// turns off MFC's hiding of some common and often safely ignored warning | ||
| 13 | +// messages | ||
| 14 | +#define _AFX_ALL_WARNINGS | ||
| 15 | + | ||
| 16 | +#include <afxext.h> // MFC extensions | ||
| 17 | +#include <afxwin.h> // MFC core and standard components | ||
| 18 | + | ||
| 19 | +#ifndef _AFX_NO_OLE_SUPPORT | ||
| 20 | +#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls | ||
| 21 | +#endif | ||
| 22 | +#ifndef _AFX_NO_AFXCMN_SUPPORT | ||
| 23 | +#include <afxcmn.h> // MFC support for Windows Common Controls | ||
| 24 | +#endif // _AFX_NO_AFXCMN_SUPPORT | ||
| 25 | + | ||
| 26 | +#include <afxcontrolbars.h> // MFC support for ribbons and control bars |
| 1 | +// pch.h: This is a precompiled header file. | ||
| 2 | +// Files listed below are compiled only once, improving build performance for | ||
| 3 | +// future builds. This also affects IntelliSense performance, including code | ||
| 4 | +// completion and many code browsing features. However, files listed here are | ||
| 5 | +// ALL re-compiled if any one of them is updated between builds. Do not add | ||
| 6 | +// files here that you will be updating frequently as this negates the | ||
| 7 | +// performance advantage. | ||
| 8 | + | ||
| 9 | +#ifndef PCH_H | ||
| 10 | +#define PCH_H | ||
| 11 | + | ||
| 12 | +// add headers that you want to pre-compile here | ||
| 13 | +#include "framework.h" | ||
| 14 | + | ||
| 15 | +#endif // PCH_H |
不能预览此文件类型
| 1 | +<?xml version="1.0" encoding="utf-8"?> | ||
| 2 | +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
| 3 | + <ImportGroup Label="PropertySheets" /> | ||
| 4 | + <PropertyGroup Label="UserMacros" /> | ||
| 5 | + <PropertyGroup> | ||
| 6 | + <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory> | ||
| 7 | + <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory> | ||
| 8 | + <SherpaOnnxLibraries> | ||
| 9 | + sherpa-onnx-portaudio_static.lib; | ||
| 10 | + sherpa-onnx-c-api.lib; | ||
| 11 | + sherpa-onnx-core.lib; | ||
| 12 | + kaldi-native-fbank-core.lib; | ||
| 13 | + absl_base.lib; | ||
| 14 | + absl_city.lib; | ||
| 15 | + absl_hash.lib; | ||
| 16 | + absl_low_level_hash.lib; | ||
| 17 | + absl_raw_hash_set.lib; | ||
| 18 | + absl_raw_logging_internal.lib; | ||
| 19 | + absl_throw_delegate.lib; | ||
| 20 | + clog.lib; | ||
| 21 | + cpuinfo.lib; | ||
| 22 | + flatbuffers.lib; | ||
| 23 | + libprotobuf-lite.lib; | ||
| 24 | + onnx.lib; | ||
| 25 | + onnx_proto.lib; | ||
| 26 | + onnxruntime_common.lib; | ||
| 27 | + onnxruntime_flatbuffers.lib; | ||
| 28 | + onnxruntime_framework.lib; | ||
| 29 | + onnxruntime_graph.lib; | ||
| 30 | + onnxruntime_mlas.lib; | ||
| 31 | + onnxruntime_optimizer.lib; | ||
| 32 | + onnxruntime_providers.lib; | ||
| 33 | + onnxruntime_session.lib; | ||
| 34 | + onnxruntime_util.lib; | ||
| 35 | + re2.lib; | ||
| 36 | + </SherpaOnnxLibraries> | ||
| 37 | + </PropertyGroup> | ||
| 38 | + <ItemDefinitionGroup> | ||
| 39 | + <ClCompile> | ||
| 40 | + <AdditionalIncludeDirectories> | ||
| 41 | + $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include; | ||
| 42 | + $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
| 43 | + </ClCompile> | ||
| 44 | + <Link> | ||
| 45 | + <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | ||
| 46 | + <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies> | ||
| 47 | + </Link> | ||
| 48 | + </ItemDefinitionGroup> | ||
| 49 | + <ItemGroup /> | ||
| 50 | +</Project> |
| 1 | +#pragma once | ||
| 2 | + | ||
| 3 | +// Including SDKDDKVer.h defines the highest available Windows platform. | ||
| 4 | + | ||
| 5 | +// If you wish to build your application for a previous Windows platform, | ||
| 6 | +// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish | ||
| 7 | +// to support before including SDKDDKVer.h. | ||
| 8 | + | ||
| 9 | +#include <SDKDDKVer.h> |
| @@ -3,11 +3,19 @@ | @@ -3,11 +3,19 @@ | ||
| 3 | This directory contains examples showing how to use Next-gen Kaldi in MFC | 3 | This directory contains examples showing how to use Next-gen Kaldi in MFC |
| 4 | for speech recognition. | 4 | for speech recognition. |
| 5 | 5 | ||
| 6 | -Caution: You need to use Windows and install Visual Studio in order to run it. | 6 | +Caution: You need to use Windows and install Visual Studio 2022 in order to |
| 7 | +compile it. | ||
| 8 | + | ||
| 9 | +Hint: If you don't want to install Visual Studio, you can find below | ||
| 10 | +about how to download pre-compiled `exe`. | ||
| 11 | + | ||
| 7 | We use bash script below to demonstrate how to use it. Please change | 12 | We use bash script below to demonstrate how to use it. Please change |
| 8 | the commands accordingly for Windows. | 13 | the commands accordingly for Windows. |
| 9 | 14 | ||
| 10 | -## Streaming speech recognition | 15 | +## How to compile |
| 16 | + | ||
| 17 | + | ||
| 18 | +First, we need to compile sherpa-onnx: | ||
| 11 | 19 | ||
| 12 | ```bash | 20 | ```bash |
| 13 | mkdir -p $HOME/open-source | 21 | mkdir -p $HOME/open-source |
| @@ -19,7 +27,6 @@ mkdir build | @@ -19,7 +27,6 @@ mkdir build | ||
| 19 | 27 | ||
| 20 | cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install .. | 28 | cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install .. |
| 21 | cmake --build . --config Release --target install | 29 | cmake --build . --config Release --target install |
| 22 | - | ||
| 23 | cd ../mfc-examples | 30 | cd ../mfc-examples |
| 24 | 31 | ||
| 25 | msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64 | 32 | msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64 |
| @@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6 | @@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6 | ||
| 27 | # now run the program | 34 | # now run the program |
| 28 | 35 | ||
| 29 | ./x64/Release/StreamingSpeechRecognition.exe | 36 | ./x64/Release/StreamingSpeechRecognition.exe |
| 37 | +./x64/Release/NonStreamingSpeechRecognition.exe | ||
| 30 | ``` | 38 | ``` |
| 31 | 39 | ||
| 32 | -Note that we also need to download pre-trained models. Please | ||
| 33 | -refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html | ||
| 34 | -for a list of streaming models. | 40 | +If you don't want to compile the project by yourself, you can download |
| 41 | +pre-compiled `exe` from https://github.com/k2-fsa/sherpa-onnx/releases | ||
| 35 | 42 | ||
| 36 | -We use the following model for demonstration. | 43 | +For instance, you can use the following addresses: |
| 37 | 44 | ||
| 38 | -```bash | ||
| 39 | -cd $HOME/open-source/sherpa-onnx/mfc-examples/x64/Release | ||
| 40 | -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx | ||
| 41 | -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx | ||
| 42 | -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx | ||
| 43 | -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt | ||
| 44 | - | ||
| 45 | -# now rename | ||
| 46 | -mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx | ||
| 47 | -mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx | ||
| 48 | -mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx | ||
| 49 | - | ||
| 50 | -# Now run it! | ||
| 51 | -./StreamingSpeechRecognition.exe | ||
| 52 | -``` | 45 | + - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe |
| 46 | + - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe |
| @@ -3,12 +3,14 @@ | @@ -3,12 +3,14 @@ | ||
| 3 | // application. | 3 | // application. |
| 4 | // | 4 | // |
| 5 | 5 | ||
| 6 | +// clang-format off | ||
| 6 | #include "pch.h" | 7 | #include "pch.h" |
| 7 | #include "framework.h" | 8 | #include "framework.h" |
| 9 | +// clang-format on | ||
| 8 | 10 | ||
| 9 | #include "StreamingSpeechRecognition.h" | 11 | #include "StreamingSpeechRecognition.h" |
| 10 | -#include "StreamingSpeechRecognitionDlg.h" | ||
| 11 | 12 | ||
| 13 | +#include "StreamingSpeechRecognitionDlg.h" | ||
| 12 | 14 | ||
| 13 | #ifdef _DEBUG | 15 | #ifdef _DEBUG |
| 14 | #define new DEBUG_NEW | 16 | #define new DEBUG_NEW |
| 1 | 1 | ||
| 2 | // StreamingSpeechRecognitionDlg.cpp : implementation file | 2 | // StreamingSpeechRecognitionDlg.cpp : implementation file |
| 3 | // | 3 | // |
| 4 | +// clang-format off | ||
| 4 | #include "pch.h" | 5 | #include "pch.h" |
| 5 | #include "framework.h" | 6 | #include "framework.h" |
| 6 | #include "afxdialogex.h" | 7 | #include "afxdialogex.h" |
| 7 | - | 8 | +// clang-format on |
| 8 | 9 | ||
| 9 | #include "StreamingSpeechRecognitionDlg.h" | 10 | #include "StreamingSpeechRecognitionDlg.h" |
| 10 | 11 | ||
| @@ -15,7 +16,6 @@ | @@ -15,7 +16,6 @@ | ||
| 15 | 16 | ||
| 16 | #include "StreamingSpeechRecognition.h" | 17 | #include "StreamingSpeechRecognition.h" |
| 17 | 18 | ||
| 18 | - | ||
| 19 | #ifdef _DEBUG | 19 | #ifdef _DEBUG |
| 20 | #define new DEBUG_NEW | 20 | #define new DEBUG_NEW |
| 21 | #endif | 21 | #endif |
| @@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() { | @@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() { | ||
| 223 | // exit(-1); | 223 | // exit(-1); |
| 224 | AppendLineToMultilineEditCtrl("No default input device found!"); | 224 | AppendLineToMultilineEditCtrl("No default input device found!"); |
| 225 | my_btn_.EnableWindow(FALSE); | 225 | my_btn_.EnableWindow(FALSE); |
| 226 | + return; | ||
| 226 | } | 227 | } |
| 227 | AppendLineToMultilineEditCtrl(std::string("Selected device ") + | 228 | AppendLineToMultilineEditCtrl(std::string("Selected device ") + |
| 228 | Pa_GetDeviceInfo(default_device)->name); | 229 | Pa_GetDeviceInfo(default_device)->name); |
| @@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() { | @@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() { | ||
| 309 | msg += "\r\n"; | 310 | msg += "\r\n"; |
| 310 | msg += "That's it!\r\n"; | 311 | msg += "That's it!\r\n"; |
| 311 | 312 | ||
| 312 | - | ||
| 313 | AppendLineToMultilineEditCtrl(msg); | 313 | AppendLineToMultilineEditCtrl(msg); |
| 314 | return; | 314 | return; |
| 315 | } | 315 | } |
| @@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl( | @@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl( | ||
| 398 | // put the selection at the end of text | 398 | // put the selection at the end of text |
| 399 | my_text_.SetSel(nLength, nLength); | 399 | my_text_.SetSel(nLength, nLength); |
| 400 | // replace the selection | 400 | // replace the selection |
| 401 | - CString str; | ||
| 402 | - str.Format(_T("%s"), s.c_str()); | ||
| 403 | 401 | ||
| 404 | std::wstring wstr = Utf8ToUtf16(s); | 402 | std::wstring wstr = Utf8ToUtf16(s); |
| 405 | 403 |
| 1 | | 1 | |
| 2 | Microsoft Visual Studio Solution File, Format Version 12.00 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 |
| 3 | -# Visual Studio Version 16 | ||
| 4 | -VisualStudioVersion = 16.0.32630.194 | 3 | +# Visual Studio Version 17 |
| 4 | +VisualStudioVersion = 17.6.33829.357 | ||
| 5 | MinimumVisualStudioVersion = 10.0.40219.1 | 5 | MinimumVisualStudioVersion = 10.0.40219.1 |
| 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}" | 6 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}" |
| 7 | EndProject | 7 | EndProject |
| 8 | +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}" | ||
| 9 | +EndProject | ||
| 8 | Global | 10 | Global |
| 9 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 11 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 10 | Debug|x64 = Debug|x64 | 12 | Debug|x64 = Debug|x64 |
| @@ -21,6 +23,14 @@ Global | @@ -21,6 +23,14 @@ Global | ||
| 21 | {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64 | 23 | {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64 |
| 22 | {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32 | 24 | {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32 |
| 23 | {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32 | 25 | {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32 |
| 26 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64 | ||
| 27 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64 | ||
| 28 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32 | ||
| 29 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32 | ||
| 30 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64 | ||
| 31 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64 | ||
| 32 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32 | ||
| 33 | + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32 | ||
| 24 | EndGlobalSection | 34 | EndGlobalSection |
| 25 | GlobalSection(SolutionProperties) = preSolution | 35 | GlobalSection(SolutionProperties) = preSolution |
| 26 | HideSolutionNode = FALSE | 36 | HideSolutionNode = FALSE |
| @@ -27,36 +27,38 @@ struct SherpaOnnxDisplay { | @@ -27,36 +27,38 @@ struct SherpaOnnxDisplay { | ||
| 27 | std::unique_ptr<sherpa_onnx::Display> impl; | 27 | std::unique_ptr<sherpa_onnx::Display> impl; |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| 30 | +#define SHERPA_ONNX_OR(x, y) (x ? x : y) | ||
| 31 | + | ||
| 30 | SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | 32 | SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( |
| 31 | const SherpaOnnxOnlineRecognizerConfig *config) { | 33 | const SherpaOnnxOnlineRecognizerConfig *config) { |
| 32 | sherpa_onnx::OnlineRecognizerConfig recognizer_config; | 34 | sherpa_onnx::OnlineRecognizerConfig recognizer_config; |
| 33 | 35 | ||
| 34 | - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | ||
| 35 | - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | 36 | + recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000); |
| 37 | + recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); | ||
| 36 | 38 | ||
| 37 | recognizer_config.model_config.encoder_filename = | 39 | recognizer_config.model_config.encoder_filename = |
| 38 | - config->model_config.encoder; | 40 | + SHERPA_ONNX_OR(config->model_config.encoder, ""); |
| 39 | recognizer_config.model_config.decoder_filename = | 41 | recognizer_config.model_config.decoder_filename = |
| 40 | - config->model_config.decoder; | ||
| 41 | - recognizer_config.model_config.joiner_filename = config->model_config.joiner; | ||
| 42 | - recognizer_config.model_config.tokens = config->model_config.tokens; | ||
| 43 | - recognizer_config.model_config.num_threads = config->model_config.num_threads; | ||
| 44 | - recognizer_config.model_config.provider = config->model_config.provider; | ||
| 45 | - recognizer_config.model_config.debug = config->model_config.debug; | 42 | + SHERPA_ONNX_OR(config->model_config.decoder, ""); |
| 43 | + recognizer_config.model_config.joiner_filename = SHERPA_ONNX_OR(config->model_config.joiner, ""); | ||
| 44 | + recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, ""); | ||
| 45 | + recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1); | ||
| 46 | + recognizer_config.model_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu"); | ||
| 47 | + recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0); | ||
| 46 | 48 | ||
| 47 | - recognizer_config.decoding_method = config->decoding_method; | ||
| 48 | - recognizer_config.max_active_paths = config->max_active_paths; | 49 | + recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); |
| 50 | + recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4); | ||
| 49 | 51 | ||
| 50 | - recognizer_config.enable_endpoint = config->enable_endpoint; | 52 | + recognizer_config.enable_endpoint = SHERPA_ONNX_OR(config->enable_endpoint, 0); |
| 51 | 53 | ||
| 52 | recognizer_config.endpoint_config.rule1.min_trailing_silence = | 54 | recognizer_config.endpoint_config.rule1.min_trailing_silence = |
| 53 | - config->rule1_min_trailing_silence; | 55 | + SHERPA_ONNX_OR(config->rule1_min_trailing_silence, 2.4); |
| 54 | 56 | ||
| 55 | recognizer_config.endpoint_config.rule2.min_trailing_silence = | 57 | recognizer_config.endpoint_config.rule2.min_trailing_silence = |
| 56 | - config->rule2_min_trailing_silence; | 58 | + SHERPA_ONNX_OR(config->rule2_min_trailing_silence, 1.2); |
| 57 | 59 | ||
| 58 | recognizer_config.endpoint_config.rule3.min_utterance_length = | 60 | recognizer_config.endpoint_config.rule3.min_utterance_length = |
| 59 | - config->rule3_min_utterance_length; | 61 | + SHERPA_ONNX_OR(config->rule3_min_utterance_length, 20); |
| 60 | 62 | ||
| 61 | if (config->model_config.debug) { | 63 | if (config->model_config.debug) { |
| 62 | fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); | 64 | fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); |
| @@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | @@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | ||
| 171 | const SherpaOnnxOfflineRecognizerConfig *config) { | 173 | const SherpaOnnxOfflineRecognizerConfig *config) { |
| 172 | sherpa_onnx::OfflineRecognizerConfig recognizer_config; | 174 | sherpa_onnx::OfflineRecognizerConfig recognizer_config; |
| 173 | 175 | ||
| 174 | - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | 176 | + recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000); |
| 175 | 177 | ||
| 176 | - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | 178 | + recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); |
| 177 | 179 | ||
| 178 | recognizer_config.model_config.transducer.encoder_filename = | 180 | recognizer_config.model_config.transducer.encoder_filename = |
| 179 | - config->model_config.transducer.encoder; | 181 | + SHERPA_ONNX_OR(config->model_config.transducer.encoder, ""); |
| 180 | 182 | ||
| 181 | recognizer_config.model_config.transducer.decoder_filename = | 183 | recognizer_config.model_config.transducer.decoder_filename = |
| 182 | - config->model_config.transducer.decoder; | 184 | + SHERPA_ONNX_OR(config->model_config.transducer.decoder, ""); |
| 183 | 185 | ||
| 184 | recognizer_config.model_config.transducer.joiner_filename = | 186 | recognizer_config.model_config.transducer.joiner_filename = |
| 185 | - config->model_config.transducer.joiner; | 187 | + SHERPA_ONNX_OR(config->model_config.transducer.joiner,""); |
| 186 | 188 | ||
| 187 | recognizer_config.model_config.paraformer.model = | 189 | recognizer_config.model_config.paraformer.model = |
| 188 | - config->model_config.paraformer.model; | 190 | + SHERPA_ONNX_OR(config->model_config.paraformer.model, ""); |
| 189 | 191 | ||
| 190 | recognizer_config.model_config.nemo_ctc.model = | 192 | recognizer_config.model_config.nemo_ctc.model = |
| 191 | - config->model_config.nemo_ctc.model; | 193 | + SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, ""); |
| 192 | 194 | ||
| 193 | - recognizer_config.model_config.tokens = config->model_config.tokens; | ||
| 194 | - recognizer_config.model_config.num_threads = config->model_config.num_threads; | ||
| 195 | - recognizer_config.model_config.debug = config->model_config.debug; | 195 | + recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, ""); |
| 196 | + recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1); | ||
| 197 | + recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0); | ||
| 196 | 198 | ||
| 197 | - recognizer_config.lm_config.model = config->lm_config.model; | ||
| 198 | - recognizer_config.lm_config.scale = config->lm_config.scale; | 199 | + recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, ""); |
| 200 | + recognizer_config.lm_config.scale = SHERPA_ONNX_OR(config->lm_config.scale, 1.0); | ||
| 199 | 201 | ||
| 200 | - recognizer_config.decoding_method = config->decoding_method; | ||
| 201 | - recognizer_config.max_active_paths = config->max_active_paths; | 202 | + recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search"); |
| 203 | + recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4); | ||
| 202 | 204 | ||
| 203 | if (config->model_config.debug) { | 205 | if (config->model_config.debug) { |
| 204 | fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); | 206 | fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); |
-
请 注册 或 登录 后发表评论