Add non-streaming speech recognition examples for MFC (#212)

Fangjun Kuang · GitHub
Commit 0abd7ce88119230f7ca86ec48b614a61c4255224 0abd7ce8 1 parent bebc1f13
.github/workflows/mfc.yaml
cmake/onnxruntime.cmake
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.cpp
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.h
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.rc
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj.filters
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
mfc-examples/NonStreamingSpeechRecognition/Resource.h
mfc-examples/NonStreamingSpeechRecognition/framework.h
mfc-examples/NonStreamingSpeechRecognition/pch.cpp
mfc-examples/NonStreamingSpeechRecognition/pch.h
mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.ico
mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.rc2
mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props
mfc-examples/NonStreamingSpeechRecognition/targetver.h
mfc-examples/README.md
mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.cpp
mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.cpp
--- a/.github/workflows/mfc.yaml
查看文件 @0abd7ce
+++ b/.github/workflows/mfc.yaml
查看文件 @0abd7ce
@@ -98,6 +98,7 @@ jobs:
 
           cd mfc-examples/$arch/Release
           cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
+           cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
           ls -lh
 
       - name: Upload artifact
@@ -106,10 +107,24 @@ jobs:
           name: streaming-speech-recognition-${{ matrix.arch }}
           path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe
 
-       - name: Release pre-compiled binaries and libs for macOS
+       - name: Upload artifact
+         uses: actions/upload-artifact@v2
+         with:
+           name: non-streaming-speech-recognition-${{ matrix.arch }}
+           path: ./mfc-examples/${{ matrix.arch }}/Release/NonStreamingSpeechRecognition.exe
+ 
+       - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
+         if: env.RELEASE == 'true'
+         uses: svenstaro/upload-release-action@v2
+         with:
+           file_glob: true
+           overwrite: true
+           file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe
+ 
+       - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
         if: env.RELEASE == 'true'
         uses: svenstaro/upload-release-action@v2
         with:
           file_glob: true
           overwrite: true
-           file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx*.exe
+           file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe
--- a/cmake/onnxruntime.cmake
查看文件 @0abd7ce
+++ b/cmake/onnxruntime.cmake
查看文件 @0abd7ce
@@ -113,7 +113,7 @@ function(download_onnxruntime)
 
         set(onnxruntime_URL  "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2")
         set(onnxruntime_URL2 "")
-         set(onnxruntime_HASH "SHA256=a2b33a3e8a1f89cddf303f0a97a5a88f4202579c653cfb29158c8cf7da3734eb")
+         set(onnxruntime_HASH "SHA256=94d9a30976b5c4a5dff7508d00f141835916e5a36315d5f53be9b3edb85148b5")
       endif()
 
       if(SHERPA_ONNX_ENABLE_GPU)
@@ -161,7 +161,7 @@ function(download_onnxruntime)
 
         set(onnxruntime_URL  "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2")
         set(onnxruntime_URL2 "")
-         set(onnxruntime_HASH "SHA256=f5c19ac1fc6a61c78a231a41df10aede2586665ab397bdc3f007eb8d2c8d4a19")
+         set(onnxruntime_HASH "SHA256=c809a8510a89b8b37ae7d563c39229db22bac8fbefcbfe5c81a60b367d065b1b")
       endif()
     endif()
     # After downloading, it contains:
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.cpp 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.cpp 0 → 100644
查看文件 @0abd7ce
+ 
+ // NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the
+ // application.
+ //
+ 
+ // clang-format off
+ #include "pch.h"
+ #include "framework.h"
+ #include "NonStreamingSpeechRecognitionDlg.h"
+ #include "NonStreamingSpeechRecognition.h"
+ // clang-format on
+ 
+ #ifdef _DEBUG
+ #define new DEBUG_NEW
+ #endif
+ 
+ // CNonStreamingSpeechRecognitionApp
+ 
+ BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionApp, CWinApp)
+ ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
+ END_MESSAGE_MAP()
+ 
+ // CNonStreamingSpeechRecognitionApp construction
+ 
+ CNonStreamingSpeechRecognitionApp::CNonStreamingSpeechRecognitionApp() {
+   // TODO: add construction code here,
+   // Place all significant initialization in InitInstance
+ }
+ 
+ // The one and only CNonStreamingSpeechRecognitionApp object
+ 
+ CNonStreamingSpeechRecognitionApp theApp;
+ 
+ // CNonStreamingSpeechRecognitionApp initialization
+ 
+ BOOL CNonStreamingSpeechRecognitionApp::InitInstance() {
+   CWinApp::InitInstance();
+ 
+   // Create the shell manager, in case the dialog contains
+   // any shell tree view or shell list view controls.
+   CShellManager *pShellManager = new CShellManager;
+ 
+   // Activate "Windows Native" visual manager for enabling themes in MFC
+   // controls
+   CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));
+ 
+   // Standard initialization
+   // If you are not using these features and wish to reduce the size
+   // of your final executable, you should remove from the following
+   // the specific initialization routines you do not need
+   // Change the registry key under which our settings are stored
+   // TODO: You should modify this string to be something appropriate
+   // such as the name of your company or organization
+   SetRegistryKey(_T("Local AppWizard-Generated Applications"));
+ 
+   CNonStreamingSpeechRecognitionDlg dlg;
+   m_pMainWnd = &dlg;
+   INT_PTR nResponse = dlg.DoModal();
+   if (nResponse == IDOK) {
+     // TODO: Place code here to handle when the dialog is
+     //  dismissed with OK
+   } else if (nResponse == IDCANCEL) {
+     // TODO: Place code here to handle when the dialog is
+     //  dismissed with Cancel
+   } else if (nResponse == -1) {
+     TRACE(traceAppMsg, 0,
+           "Warning: dialog creation failed, so application is terminating "
+           "unexpectedly.\n");
+     TRACE(traceAppMsg, 0,
+           "Warning: if you are using MFC controls on the dialog, you cannot "
+           "#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
+   }
+ 
+   // Delete the shell manager created above.
+   if (pShellManager != nullptr) {
+     delete pShellManager;
+   }
+ 
+ #if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
+   ControlBarCleanUp();
+ #endif
+ 
+   // Since the dialog has been closed, return FALSE so that we exit the
+   //  application, rather than start the application's message pump.
+   return FALSE;
+ }
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.h 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.h 0 → 100644
查看文件 @0abd7ce
+ 
+ // NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME
+ // application
+ //
+ 
+ #pragma once
+ 
+ #ifndef __AFXWIN_H__
+ #error "include 'pch.h' before including this file for PCH"
+ #endif
+ 
+ #include "resource.h"  // main symbols
+ 
+ // CNonStreamingSpeechRecognitionApp:
+ // See NonStreamingSpeechRecognition.cpp for the implementation of this class
+ //
+ 
+ class CNonStreamingSpeechRecognitionApp : public CWinApp {
+  public:
+   CNonStreamingSpeechRecognitionApp();
+ 
+   // Overrides
+  public:
+   virtual BOOL InitInstance();
+ 
+   // Implementation
+ 
+   DECLARE_MESSAGE_MAP()
+ };
+ 
+ extern CNonStreamingSpeechRecognitionApp theApp;
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.rc 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.rc 0 → 100644
查看文件 @0abd7ce
 B// Microsoft Visual C++ generated resource script.
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj 0 → 100644
查看文件 @0abd7ce
+ <?xml version="1.0" encoding="utf-8"?>
+ <Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+   <ItemGroup Label="ProjectConfigurations">
+     <ProjectConfiguration Include="Debug|Win32">
+       <Configuration>Debug</Configuration>
+       <Platform>Win32</Platform>
+     </ProjectConfiguration>
+     <ProjectConfiguration Include="Release|Win32">
+       <Configuration>Release</Configuration>
+       <Platform>Win32</Platform>
+     </ProjectConfiguration>
+     <ProjectConfiguration Include="Debug|x64">
+       <Configuration>Debug</Configuration>
+       <Platform>x64</Platform>
+     </ProjectConfiguration>
+     <ProjectConfiguration Include="Release|x64">
+       <Configuration>Release</Configuration>
+       <Platform>x64</Platform>
+     </ProjectConfiguration>
+   </ItemGroup>
+   <PropertyGroup Label="Globals">
+     <VCProjectVersion>17.0</VCProjectVersion>
+     <ProjectGuid>{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}</ProjectGuid>
+     <Keyword>MFCProj</Keyword>
+     <RootNamespace>NonStreamingSpeechRecognition</RootNamespace>
+     <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
+   </PropertyGroup>
+   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+     <ConfigurationType>Application</ConfigurationType>
+     <UseDebugLibraries>true</UseDebugLibraries>
+     <PlatformToolset>v143</PlatformToolset>
+     <CharacterSet>Unicode</CharacterSet>
+     <UseOfMfc>Static</UseOfMfc>
+   </PropertyGroup>
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+     <ConfigurationType>Application</ConfigurationType>
+     <UseDebugLibraries>false</UseDebugLibraries>
+     <PlatformToolset>v143</PlatformToolset>
+     <WholeProgramOptimization>true</WholeProgramOptimization>
+     <CharacterSet>Unicode</CharacterSet>
+     <UseOfMfc>Static</UseOfMfc>
+   </PropertyGroup>
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+     <ConfigurationType>Application</ConfigurationType>
+     <UseDebugLibraries>true</UseDebugLibraries>
+     <PlatformToolset>v143</PlatformToolset>
+     <CharacterSet>Unicode</CharacterSet>
+     <UseOfMfc>Static</UseOfMfc>
+   </PropertyGroup>
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+     <ConfigurationType>Application</ConfigurationType>
+     <UseDebugLibraries>false</UseDebugLibraries>
+     <PlatformToolset>v143</PlatformToolset>
+     <WholeProgramOptimization>true</WholeProgramOptimization>
+     <CharacterSet>Unicode</CharacterSet>
+     <UseOfMfc>Static</UseOfMfc>
+   </PropertyGroup>
+   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+   <ImportGroup Label="ExtensionSettings">
+   </ImportGroup>
+   <ImportGroup Label="Shared">
+   </ImportGroup>
+   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+     <Import Project="sherpa-onnx-deps.props" />
+   </ImportGroup>
+   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+     <Import Project="sherpa-onnx-deps.props" />
+   </ImportGroup>
+   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+     <Import Project="sherpa-onnx-deps.props" />
+   </ImportGroup>
+   <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+     <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+     <Import Project="sherpa-onnx-deps.props" />
+   </ImportGroup>
+   <PropertyGroup Label="UserMacros" />
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+     <LinkIncremental>false</LinkIncremental>
+   </PropertyGroup>
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+     <LinkIncremental>true</LinkIncremental>
+   </PropertyGroup>
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+     <LinkIncremental>true</LinkIncremental>
+   </PropertyGroup>
+   <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+     <LinkIncremental>false</LinkIncremental>
+   </PropertyGroup>
+   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+     <ClCompile>
+       <PrecompiledHeader>Use</PrecompiledHeader>
+       <WarningLevel>Level3</WarningLevel>
+       <FunctionLevelLinking>true</FunctionLevelLinking>
+       <IntrinsicFunctions>true</IntrinsicFunctions>
+       <SDLCheck>true</SDLCheck>
+       <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+     </ClCompile>
+     <Link>
+       <SubSystem>Windows</SubSystem>
+       <EnableCOMDATFolding>true</EnableCOMDATFolding>
+       <OptimizeReferences>true</OptimizeReferences>
+     </Link>
+     <Midl>
+       <MkTypLibCompatible>false</MkTypLibCompatible>
+       <ValidateAllParameters>true</ValidateAllParameters>
+       <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+     </Midl>
+     <ResourceCompile>
+       <Culture>0x0409</Culture>
+       <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+     </ResourceCompile>
+   </ItemDefinitionGroup>
+   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+     <ClCompile>
+       <PrecompiledHeader>Use</PrecompiledHeader>
+       <WarningLevel>Level3</WarningLevel>
+       <SDLCheck>true</SDLCheck>
+       <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+     </ClCompile>
+     <Link>
+       <SubSystem>Windows</SubSystem>
+     </Link>
+     <Midl>
+       <MkTypLibCompatible>false</MkTypLibCompatible>
+       <ValidateAllParameters>true</ValidateAllParameters>
+       <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+     </Midl>
+     <ResourceCompile>
+       <Culture>0x0409</Culture>
+       <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+     </ResourceCompile>
+   </ItemDefinitionGroup>
+   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+     <ClCompile>
+       <PrecompiledHeader>Use</PrecompiledHeader>
+       <WarningLevel>Level3</WarningLevel>
+       <SDLCheck>true</SDLCheck>
+       <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+     </ClCompile>
+     <Link>
+       <SubSystem>Windows</SubSystem>
+     </Link>
+     <Midl>
+       <MkTypLibCompatible>false</MkTypLibCompatible>
+       <ValidateAllParameters>true</ValidateAllParameters>
+       <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+     </Midl>
+     <ResourceCompile>
+       <Culture>0x0409</Culture>
+       <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+     </ResourceCompile>
+   </ItemDefinitionGroup>
+   <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+     <ClCompile>
+       <PrecompiledHeader>Use</PrecompiledHeader>
+       <WarningLevel>Level3</WarningLevel>
+       <FunctionLevelLinking>true</FunctionLevelLinking>
+       <IntrinsicFunctions>true</IntrinsicFunctions>
+       <SDLCheck>true</SDLCheck>
+       <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
+     </ClCompile>
+     <Link>
+       <SubSystem>Windows</SubSystem>
+       <EnableCOMDATFolding>true</EnableCOMDATFolding>
+       <OptimizeReferences>true</OptimizeReferences>
+     </Link>
+     <Midl>
+       <MkTypLibCompatible>false</MkTypLibCompatible>
+       <ValidateAllParameters>true</ValidateAllParameters>
+       <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+     </Midl>
+     <ResourceCompile>
+       <Culture>0x0409</Culture>
+       <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+       <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+     </ResourceCompile>
+   </ItemDefinitionGroup>
+   <ItemGroup>
+     <ClInclude Include="framework.h" />
+     <ClInclude Include="NonStreamingSpeechRecognition.h" />
+     <ClInclude Include="NonStreamingSpeechRecognitionDlg.h" />
+     <ClInclude Include="pch.h" />
+     <ClInclude Include="Resource.h" />
+     <ClInclude Include="targetver.h" />
+   </ItemGroup>
+   <ItemGroup>
+     <ClCompile Include="NonStreamingSpeechRecognition.cpp" />
+     <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp" />
+     <ClCompile Include="pch.cpp">
+       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
+       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
+       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
+       <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
+     </ClCompile>
+   </ItemGroup>
+   <ItemGroup>
+     <ResourceCompile Include="NonStreamingSpeechRecognition.rc" />
+   </ItemGroup>
+   <ItemGroup>
+     <None Include="res\NonStreamingSpeechRecognition.rc2" />
+   </ItemGroup>
+   <ItemGroup>
+     <Image Include="res\NonStreamingSpeechRecognition.ico" />
+   </ItemGroup>
+   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+   <ImportGroup Label="ExtensionTargets">
+   </ImportGroup>
+ </Project>
\ No newline at end of file
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj.filters 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj.filters 0 → 100644
查看文件 @0abd7ce
+ <?xml version="1.0" encoding="utf-8"?>
+ <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+   <ItemGroup>
+     <Filter Include="Source Files">
+       <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+       <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+     </Filter>
+     <Filter Include="Header Files">
+       <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+       <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
+     </Filter>
+     <Filter Include="Resource Files">
+       <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+       <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+     </Filter>
+   </ItemGroup>
+   <ItemGroup>
+     <ClInclude Include="NonStreamingSpeechRecognition.h">
+       <Filter>Header Files</Filter>
+     </ClInclude>
+     <ClInclude Include="NonStreamingSpeechRecognitionDlg.h">
+       <Filter>Header Files</Filter>
+     </ClInclude>
+     <ClInclude Include="framework.h">
+       <Filter>Header Files</Filter>
+     </ClInclude>
+     <ClInclude Include="targetver.h">
+       <Filter>Header Files</Filter>
+     </ClInclude>
+     <ClInclude Include="Resource.h">
+       <Filter>Header Files</Filter>
+     </ClInclude>
+     <ClInclude Include="pch.h">
+       <Filter>Header Files</Filter>
+     </ClInclude>
+   </ItemGroup>
+   <ItemGroup>
+     <ClCompile Include="NonStreamingSpeechRecognition.cpp">
+       <Filter>Source Files</Filter>
+     </ClCompile>
+     <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp">
+       <Filter>Source Files</Filter>
+     </ClCompile>
+     <ClCompile Include="pch.cpp">
+       <Filter>Source Files</Filter>
+     </ClCompile>
+   </ItemGroup>
+   <ItemGroup>
+     <ResourceCompile Include="NonStreamingSpeechRecognition.rc">
+       <Filter>Resource Files</Filter>
+     </ResourceCompile>
+   </ItemGroup>
+   <ItemGroup>
+     <None Include="res\NonStreamingSpeechRecognition.rc2">
+       <Filter>Resource Files</Filter>
+     </None>
+   </ItemGroup>
+   <ItemGroup>
+     <Image Include="res\NonStreamingSpeechRecognition.ico">
+       <Filter>Resource Files</Filter>
+     </Image>
+   </ItemGroup>
+ </Project>
\ No newline at end of file
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp 0 → 100644
查看文件 @0abd7ce
+ 
+ // NonStreamingSpeechRecognitionDlg.cpp : implementation file
+ //
+ 
+ // clang-format off
+ #include "pch.h"
+ #include "framework.h"
+ #include "afxdialogex.h"
+ #include "NonStreamingSpeechRecognition.h"
+ #include "NonStreamingSpeechRecognitionDlg.h"
+ // clang-format on
+ 
+ #include <fstream>
+ #include <sstream>
+ #include <string>
+ #include <vector>
+ 
+ #ifdef _DEBUG
+ #define new DEBUG_NEW
+ #endif
+ 
+ Microphone::Microphone() {
+   PaError err = Pa_Initialize();
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(-2);
+   }
+ }
+ 
+ Microphone::~Microphone() {
+   PaError err = Pa_Terminate();
+   if (err != paNoError) {
+     fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
+     exit(-2);
+   }
+ }
+ 
+ // see
+ // https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
+ static std::wstring Utf8ToUtf16(const std::string &utf8) {
+   std::vector<unsigned long> unicode;
+   size_t i = 0;
+   while (i < utf8.size()) {
+     unsigned long uni;
+     size_t todo;
+     bool error = false;
+     unsigned char ch = utf8[i++];
+     if (ch <= 0x7F) {
+       uni = ch;
+       todo = 0;
+     } else if (ch <= 0xBF) {
+       throw std::logic_error("not a UTF-8 string");
+     } else if (ch <= 0xDF) {
+       uni = ch & 0x1F;
+       todo = 1;
+     } else if (ch <= 0xEF) {
+       uni = ch & 0x0F;
+       todo = 2;
+     } else if (ch <= 0xF7) {
+       uni = ch & 0x07;
+       todo = 3;
+     } else {
+       throw std::logic_error("not a UTF-8 string");
+     }
+     for (size_t j = 0; j < todo; ++j) {
+       if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
+       unsigned char ch = utf8[i++];
+       if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
+       uni <<= 6;
+       uni += ch & 0x3F;
+     }
+     if (uni >= 0xD800 && uni <= 0xDFFF)
+       throw std::logic_error("not a UTF-8 string");
+     if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
+     unicode.push_back(uni);
+   }
+   std::wstring utf16;
+   for (size_t i = 0; i < unicode.size(); ++i) {
+     unsigned long uni = unicode[i];
+     if (uni <= 0xFFFF) {
+       utf16 += (wchar_t)uni;
+     } else {
+       uni -= 0x10000;
+       utf16 += (wchar_t)((uni >> 10) + 0xD800);
+       utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
+     }
+   }
+   return utf16;
+ }
+ 
+ static std::string Cat(const std::vector<std::string> &results) {
+   std::ostringstream os;
+   std::string sep;
+ 
+   int i = 0;
+   for (i = 0; i != results.size(); ++i) {
+     os << sep << i << ": " << results[i];
+     sep = "\r\n";
+   }
+ 
+   return os.str();
+ }
+ 
+ // CNonStreamingSpeechRecognitionDlg dialog
+ 
+ CNonStreamingSpeechRecognitionDlg::CNonStreamingSpeechRecognitionDlg(
+     CWnd *pParent /*=nullptr*/)
+     : CDialogEx(IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG, pParent) {
+   m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
+ }
+ 
+ CNonStreamingSpeechRecognitionDlg::~CNonStreamingSpeechRecognitionDlg() {
+   if (recognizer_) {
+     DestroyOfflineRecognizer(recognizer_);
+     recognizer_ = nullptr;
+   }
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) {
+   CDialogEx::DoDataExchange(pDX);
+   DDX_Control(pDX, IDC_EDIT1, my_text_);
+   DDX_Control(pDX, IDOK, my_btn_);
+ }
+ 
+ BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionDlg, CDialogEx)
+ ON_WM_PAINT()
+ ON_WM_QUERYDRAGICON()
+ ON_BN_CLICKED(IDOK, &CNonStreamingSpeechRecognitionDlg::OnBnClickedOk)
+ END_MESSAGE_MAP()
+ 
+ // CNonStreamingSpeechRecognitionDlg message handlers
+ 
+ BOOL CNonStreamingSpeechRecognitionDlg::OnInitDialog() {
+   CDialogEx::OnInitDialog();
+ 
+   // Set the icon for this dialog.  The framework does this automatically
+   //  when the application's main window is not a dialog
+   SetIcon(m_hIcon, TRUE);   // Set big icon
+   SetIcon(m_hIcon, FALSE);  // Set small icon
+ 
+   // TODO: Add extra initialization here
+   InitMicrophone();
+ 
+   return TRUE;  // return TRUE  unless you set the focus to a control
+ }
+ 
+ // If you add a minimize button to your dialog, you will need the code below
+ //  to draw the icon.  For MFC applications using the document/view model,
+ //  this is automatically done for you by the framework.
+ 
+ void CNonStreamingSpeechRecognitionDlg::OnPaint() {
+   if (IsIconic()) {
+     CPaintDC dc(this);  // device context for painting
+ 
+     SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()),
+                 0);
+ 
+     // Center icon in client rectangle
+     int cxIcon = GetSystemMetrics(SM_CXICON);
+     int cyIcon = GetSystemMetrics(SM_CYICON);
+     CRect rect;
+     GetClientRect(&rect);
+     int x = (rect.Width() - cxIcon + 1) / 2;
+     int y = (rect.Height() - cyIcon + 1) / 2;
+ 
+     // Draw the icon
+     dc.DrawIcon(x, y, m_hIcon);
+   } else {
+     CDialogEx::OnPaint();
+   }
+ }
+ 
+ // The system calls this function to obtain the cursor to display while the user
+ // drags
+ //  the minimized window.
+ HCURSOR CNonStreamingSpeechRecognitionDlg::OnQueryDragIcon() {
+   return static_cast<HCURSOR>(m_hIcon);
+ }
+ 
+ static int32_t RecordCallback(const void *input_buffer,
+                               void * /*output_buffer*/,
+                               unsigned long frames_per_buffer,  // NOLINT
+                               const PaStreamCallbackTimeInfo * /*time_info*/,
+                               PaStreamCallbackFlags /*status_flags*/,
+                               void *user_data) {
+   auto dlg = reinterpret_cast<CNonStreamingSpeechRecognitionDlg *>(user_data);
+   auto begin = reinterpret_cast<const float *>(input_buffer);
+   auto end = begin + frames_per_buffer;
+   dlg->samples_.insert(dlg->samples_.end(), begin, end);
+ 
+   return dlg->started_ ? paContinue : paComplete;
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
+   if (!recognizer_) {
+     AppendLineToMultilineEditCtrl("Creating recognizer...");
+     AppendLineToMultilineEditCtrl("It will take several seconds. Please wait");
+     InitRecognizer();
+     if (!recognizer_) {
+       // failed to create the recognizer
+       return;
+     }
+     AppendLineToMultilineEditCtrl("Recognizer created!");
+   }
+ 
+   if (!started_) {
+     samples_.clear();
+     started_ = true;
+ 
+     PaStreamParameters param;
+     param.device = Pa_GetDefaultInputDevice();
+     const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
+     param.channelCount = 1;
+     param.sampleFormat = paFloat32;
+     param.suggestedLatency = info->defaultLowInputLatency;
+     param.hostApiSpecificStreamInfo = nullptr;
+     float sample_rate = config_.feat_config.sample_rate;
+     pa_stream_ = nullptr;
+     PaError err =
+         Pa_OpenStream(&pa_stream_, &param, nullptr, /* &outputParameters, */
+                       sample_rate,
+                       0,          // frames per buffer
+                       paClipOff,  // we won't output out of range samples
+                                   // so don't bother clipping them
+                       RecordCallback, this);
+     if (err != paNoError) {
+       AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
+                                     Pa_GetErrorText(err));
+       my_btn_.EnableWindow(FALSE);
+       return;
+     }
+ 
+     err = Pa_StartStream(pa_stream_);
+     if (err != paNoError) {
+       AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
+                                     Pa_GetErrorText(err));
+       my_btn_.EnableWindow(FALSE);
+       return;
+     }
+     AppendLineToMultilineEditCtrl(
+         "\r\nStarted! Please speak and click stop.\r\n");
+     my_btn_.SetWindowText(_T("Stop"));
+ 
+   } else {
+     started_ = false;
+ 
+     Pa_Sleep(200);  // sleep for 200ms
+     if (pa_stream_) {
+       PaError err = Pa_CloseStream(pa_stream_);
+       if (err != paNoError) {
+         AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
+                                       Pa_GetErrorText(err));
+         my_btn_.EnableWindow(FALSE);
+         return;
+       }
+     }
+     pa_stream_ = nullptr;
+ 
+     SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer_);
+ 
+     AcceptWaveformOffline(stream, config_.feat_config.sample_rate,
+                           samples_.data(), samples_.size());
+     DecodeOfflineStream(recognizer_, stream);
+     SherpaOnnxOfflineRecognizerResult *r = GetOfflineStreamResult(stream);
+     results_.emplace_back(r->text);
+ 
+     auto str = Utf8ToUtf16(Cat(results_).c_str());
+     my_text_.SetWindowText(str.c_str());
+     my_text_.SetFocus();
+     my_text_.SetSel(-1);
+ 
+     DestroyOfflineRecognizerResult(r);
+ 
+     DestroyOfflineStream(stream);
+     // AfxMessageBox("Stopped", MB_OK);
+     my_btn_.SetWindowText(_T("Start"));
+     AppendLineToMultilineEditCtrl("\r\nStopped. Please click start and speak");
+   }
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::InitMicrophone() {
+   int default_device = Pa_GetDefaultInputDevice();
+   int device_count = Pa_GetDeviceCount();
+   if (default_device == paNoDevice) {
+     // CString str;
+     // str.Format(_T("No default input device found!"));
+     // AfxMessageBox(str, MB_OK | MB_ICONSTOP);
+     // exit(-1);
+     AppendLineToMultilineEditCtrl("No default input device found!");
+     my_btn_.EnableWindow(FALSE);
+     return;
+   }
+   AppendLineToMultilineEditCtrl(std::string("Selected device ") +
+                                 Pa_GetDeviceInfo(default_device)->name);
+ }
+ 
+ bool CNonStreamingSpeechRecognitionDlg::Exists(const std::string &filename) {
+   std::ifstream is(filename);
+   return is.good();
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
+   my_btn_.EnableWindow(FALSE);
+   std::string msg =
+       "\r\nPlease go to\r\n"
+       "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
+       "\r\n";
+   msg += "to download a non-streaming model, i.e., an offline model.\r\n";
+   msg +=
+       "You need to rename them to encoder.onnx, decoder.onnx, and "
+       "joiner.onnx correspoondingly.\r\n\r\n";
+   msg += "It supports both transducer models and paraformer models.\r\n\r\n";
+   msg +=
+       "We give two examples below to show you how to download models\r\n\r\n";
+   msg += "(1) Transducer\r\n\r\n";
+   msg +=
+       "We use "
+       "https://huggingface.co/pkufool/"
+       "icefall-asr-zipformer-wenetspeech-20230615 below\r\n";
+   msg +=
+       "wget "
+       "https://huggingface.co/pkufool/"
+       "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
+       "encoder-epoch-12-avg-4.onnx\r\n";
+   msg +=
+       "wget "
+       "https://huggingface.co/pkufool/"
+       "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
+       "decoder-epoch-12-avg-4.onnx\r\n";
+   msg +=
+       "wget "
+       "https://huggingface.co/pkufool/"
+       "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
+       "joiner-epoch-12-avg-4.onnx\r\n";
+   msg += "\r\n Now rename them\r\n";
+   msg += "mv encoder-epoch-12-avg-4.onnx encoder.onnx\r\n";
+   msg += "mv decoder-epoch-12-avg-4.onnx decoder.onnx\r\n";
+   msg += "mv joiner-epoch-12-avg-4.onnx joiner.onnx\r\n\r\n";
+   msg += "(2) Paraformer\r\n\r\n";
+   msg +=
+       "wget "
+       "https://huggingface.co/csukuangfj/"
+       "sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/model.onnx\r\n";
+   msg +=
+       "wget "
+       "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/"
+       "resolve/main/tokens.txt\r\n\r\n";
+   msg += "\r\n Now rename them\r\n";
+   msg += "mv model.onnx paraformer.onnx\r\n";
+   msg += "\r\n";
+   msg += "That's it!\r\n";
+ 
+   AppendLineToMultilineEditCtrl(msg);
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
+   std::string paraformer = "./paraformer.onnx";
+   std::string tokens = "./tokens.txt";
+ 
+   bool is_ok = true;
+ 
+   if (Exists("./paraformer.int8.onnx")) {
+     paraformer = "./paraformer.int8.onnx";
+   } else if (!Exists(paraformer)) {
+     std::string msg = paraformer + " does not exist!";
+     AppendLineToMultilineEditCtrl(msg);
+     is_ok = false;
+   }
+ 
+   if (!Exists(tokens)) {
+     std::string msg = tokens + " does not exist!";
+     AppendLineToMultilineEditCtrl(msg);
+     is_ok = false;
+   }
+ 
+   if (!is_ok) {
+     ShowInitRecognizerHelpMessage();
+     return;
+   }
+ 
+   memset(&config_, 0, sizeof(config_));
+ 
+   config_.feat_config.sample_rate = 16000;
+   config_.feat_config.feature_dim = 80;
+ 
+   config_.model_config.paraformer.model = paraformer.c_str();
+   config_.model_config.tokens = tokens.c_str();
+   config_.model_config.num_threads = 1;
+   config_.model_config.debug = 1;
+ 
+   config_.decoding_method = "greedy_search";
+   config_.max_active_paths = 4;
+ 
+   recognizer_ = CreateOfflineRecognizer(&config_);
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
+   if (Exists("./paraformer.onnx") || Exists("./paraformer.int8.onnx")) {
+     InitParaformer();
+     return;
+   }
+ 
+   // assume it is transducer
+ 
+   std::string encoder = "./encoder.onnx";
+   std::string decoder = "./decoder.onnx";
+   std::string joiner = "./joiner.onnx";
+   std::string tokens = "./tokens.txt";
+ 
+   bool is_ok = true;
+   if (!Exists(encoder)) {
+     std::string msg = encoder + " does not exist!";
+     AppendLineToMultilineEditCtrl(msg);
+     is_ok = false;
+   }
+ 
+   if (!Exists(decoder)) {
+     std::string msg = decoder + " does not exist!";
+     AppendLineToMultilineEditCtrl(msg);
+     is_ok = false;
+   }
+ 
+   if (!Exists(joiner)) {
+     std::string msg = joiner + " does not exist!";
+     AppendLineToMultilineEditCtrl(msg);
+     is_ok = false;
+   }
+ 
+   if (!Exists(tokens)) {
+     std::string msg = tokens + " does not exist!";
+     AppendLineToMultilineEditCtrl(msg);
+     is_ok = false;
+   }
+ 
+   if (!is_ok) {
+     ShowInitRecognizerHelpMessage();
+     return;
+   }
+   memset(&config_, 0, sizeof(config_));
+ 
+   config_.feat_config.sample_rate = 16000;
+   config_.feat_config.feature_dim = 80;
+ 
+   config_.model_config.transducer.encoder = encoder.c_str();
+   config_.model_config.transducer.decoder = decoder.c_str();
+   config_.model_config.transducer.joiner = joiner.c_str();
+   config_.model_config.tokens = tokens.c_str();
+   config_.model_config.num_threads = 1;
+   config_.model_config.debug = 0;
+ 
+   config_.decoding_method = "greedy_search";
+   config_.max_active_paths = 4;
+ 
+   recognizer_ = CreateOfflineRecognizer(&config_);
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
+     const std::string &s) {
+   // get the initial text length
+   int nLength = my_text_.GetWindowTextLength();
+   // put the selection at the end of text
+   my_text_.SetSel(nLength, nLength);
+   // replace the selection
+ 
+   std::wstring wstr = Utf8ToUtf16(s);
+ 
+   my_text_.ReplaceSel(wstr.c_str());
+ }
+ 
+ void CNonStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl(
+     const std::string &s) {
+   AppendTextToEditCtrl("\r\n" + s);
+ }
--- a/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h 0 → 100644
查看文件 @0abd7ce
+ 
+ // NonStreamingSpeechRecognitionDlg.h : header file
+ //
+ 
+ #pragma once
+ 
+ #include <string>
+ #include <vector>
+ 
+ #include "portaudio.h"
+ #include "sherpa-onnx/c-api/c-api.h"
+ 
+ class Microphone {
+  public:
+   Microphone();
+   ~Microphone();
+ };
+ 
+ // CNonStreamingSpeechRecognitionDlg dialog
+ class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
+   // Construction
+  public:
+   CNonStreamingSpeechRecognitionDlg(
+       CWnd *pParent = nullptr);  // standard constructor
+   ~CNonStreamingSpeechRecognitionDlg();
+ 
+ // Dialog Data
+ #ifdef AFX_DESIGN_TIME
+   enum { IDD = IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG };
+ #endif
+ 
+  protected:
+   virtual void DoDataExchange(CDataExchange *pDX);  // DDX/DDV support
+ 
+   // Implementation
+  protected:
+   HICON m_hIcon;
+ 
+   // Generated message map functions
+   virtual BOOL OnInitDialog();
+   afx_msg void OnPaint();
+   afx_msg HCURSOR OnQueryDragIcon();
+   DECLARE_MESSAGE_MAP()
+  public:
+   afx_msg void OnBnClickedOk();
+   int RunThread();
+ 
+  private:
+   Microphone mic_;
+ 
+   SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
+   SherpaOnnxOfflineRecognizerConfig config_;
+ 
+   PaStream *pa_stream_ = nullptr;
+   CButton my_btn_;
+   CEdit my_text_;
+   std::vector<std::string> results_;
+ 
+  public:
+   bool started_ = false;
+   std::vector<float> samples_;
+ 
+  private:
+   void AppendTextToEditCtrl(const std::string &s);
+   void AppendLineToMultilineEditCtrl(const std::string &s);
+   void InitMicrophone();
+ 
+   bool Exists(const std::string &filename);
+   void InitRecognizer();
+ 
+   void InitParaformer();
+   void ShowInitRecognizerHelpMessage();
+ };
--- a/mfc-examples/NonStreamingSpeechRecognition/Resource.h 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/Resource.h 0 → 100644
查看文件 @0abd7ce
+ //{{NO_DEPENDENCIES}}
+ // Microsoft Visual C++ generated include file.
+ // Used by NonStreamingSpeechRecognition.rc
+ //
+ #define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102
+ #define IDR_MAINFRAME 128
+ #define IDC_EDIT1 1000
+ 
+ // Next default values for new objects
+ //
+ #ifdef APSTUDIO_INVOKED
+ #ifndef APSTUDIO_READONLY_SYMBOLS
+ #define _APS_NEXT_RESOURCE_VALUE 130
+ #define _APS_NEXT_COMMAND_VALUE 32771
+ #define _APS_NEXT_CONTROL_VALUE 1001
+ #define _APS_NEXT_SYMED_VALUE 101
+ #endif
+ #endif
--- a/mfc-examples/NonStreamingSpeechRecognition/framework.h 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/framework.h 0 → 100644
查看文件 @0abd7ce
+ #pragma once
+ 
+ #ifndef VC_EXTRALEAN
+ #define VC_EXTRALEAN  // Exclude rarely-used stuff from Windows headers
+ #endif
+ 
+ #include "targetver.h"
+ 
+ #define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS  // some CString constructors will be
+                                             // explicit
+ 
+ // turns off MFC's hiding of some common and often safely ignored warning
+ // messages
+ #define _AFX_ALL_WARNINGS
+ 
+ #include <afxext.h>  // MFC extensions
+ #include <afxwin.h>  // MFC core and standard components
+ 
+ #ifndef _AFX_NO_OLE_SUPPORT
+ #include <afxdtctl.h>  // MFC support for Internet Explorer 4 Common Controls
+ #endif
+ #ifndef _AFX_NO_AFXCMN_SUPPORT
+ #include <afxcmn.h>  // MFC support for Windows Common Controls
+ #endif               // _AFX_NO_AFXCMN_SUPPORT
+ 
+ #include <afxcontrolbars.h>  // MFC support for ribbons and control bars
--- a/mfc-examples/NonStreamingSpeechRecognition/pch.cpp 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/pch.cpp 0 → 100644
查看文件 @0abd7ce
+ // pch.cpp: source file corresponding to the pre-compiled header
+ 
+ #include "pch.h"
+ 
+ // When you are using pre-compiled headers, this source file is necessary for
+ // compilation to succeed.
--- a/mfc-examples/NonStreamingSpeechRecognition/pch.h 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/pch.h 0 → 100644
查看文件 @0abd7ce
+ // pch.h: This is a precompiled header file.
+ // Files listed below are compiled only once, improving build performance for
+ // future builds. This also affects IntelliSense performance, including code
+ // completion and many code browsing features. However, files listed here are
+ // ALL re-compiled if any one of them is updated between builds. Do not add
+ // files here that you will be updating frequently as this negates the
+ // performance advantage.
+ 
+ #ifndef PCH_H
+ #define PCH_H
+ 
+ // add headers that you want to pre-compile here
+ #include "framework.h"
+ 
+ #endif  // PCH_H
--- a/mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.ico 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.ico 0 → 100644
查看文件 @0abd7ce
--- a/mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.rc2 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.rc2 0 → 100644
查看文件 @0abd7ce
 B//
--- a/mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props 0 → 100644
查看文件 @0abd7ce
+ <?xml version="1.0" encoding="utf-8"?>
+ <Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+   <ImportGroup Label="PropertySheets" />
+   <PropertyGroup Label="UserMacros" />
+   <PropertyGroup>
+     <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
+     <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
+     <SherpaOnnxLibraries>
+         sherpa-onnx-portaudio_static.lib;
+         sherpa-onnx-c-api.lib;
+         sherpa-onnx-core.lib;
+         kaldi-native-fbank-core.lib;
+         absl_base.lib;
+         absl_city.lib;
+         absl_hash.lib;
+         absl_low_level_hash.lib;
+         absl_raw_hash_set.lib;
+         absl_raw_logging_internal.lib;
+         absl_throw_delegate.lib;
+         clog.lib;
+         cpuinfo.lib;
+         flatbuffers.lib;
+         libprotobuf-lite.lib;
+         onnx.lib;
+         onnx_proto.lib;
+         onnxruntime_common.lib;
+         onnxruntime_flatbuffers.lib;
+         onnxruntime_framework.lib;
+         onnxruntime_graph.lib;
+         onnxruntime_mlas.lib;
+         onnxruntime_optimizer.lib;
+         onnxruntime_providers.lib;
+         onnxruntime_session.lib;
+         onnxruntime_util.lib;
+         re2.lib;
+     </SherpaOnnxLibraries>
+   </PropertyGroup>
+   <ItemDefinitionGroup>
+     <ClCompile>
+       <AdditionalIncludeDirectories>
+ 	  $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
+     $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+     </ClCompile>
+     <Link>
+       <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
+       <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
+     </Link>
+   </ItemDefinitionGroup>
+   <ItemGroup />
+ </Project>
--- a/mfc-examples/NonStreamingSpeechRecognition/targetver.h 0 → 100644
查看文件 @0abd7ce
+++ b/mfc-examples/NonStreamingSpeechRecognition/targetver.h 0 → 100644
查看文件 @0abd7ce
+ #pragma once
+ 
+ // Including SDKDDKVer.h defines the highest available Windows platform.
+ 
+ // If you wish to build your application for a previous Windows platform,
+ // include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
+ // to support before including SDKDDKVer.h.
+ 
+ #include <SDKDDKVer.h>
--- a/mfc-examples/README.md
查看文件 @0abd7ce
+++ b/mfc-examples/README.md
查看文件 @0abd7ce
@@ -3,11 +3,19 @@
 This directory contains examples showing how to use Next-gen Kaldi in MFC
 for speech recognition.
 
- Caution: You need to use Windows and install Visual Studio in order to run it.
+ Caution: You need to use Windows and install Visual Studio 2022 in order to
+ compile it.
+ 
+ Hint: If you don't want to install Visual Studio, you can find below
+ about how to download pre-compiled `exe`.
+ 
 We use bash script below to demonstrate how to use it. Please change
 the commands accordingly for Windows.
 
- ## Streaming speech recognition
+ ## How to compile
+ 
+ 
+ First, we need to compile sherpa-onnx:
 
 ```bash
 mkdir -p $HOME/open-source
@@ -19,7 +27,6 @@ mkdir build
 
 cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..
 cmake --build . --config Release --target install
- 
 cd ../mfc-examples
 
 msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64
@@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6
 # now run the program
 
 ./x64/Release/StreamingSpeechRecognition.exe
+ ./x64/Release/NonStreamingSpeechRecognition.exe
 ```
 
- Note that we also need to download pre-trained models. Please
- refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
- for a list of streaming models.
+ If you don't want to compile the project by yourself, you can download
+ pre-compiled `exe` from https://github.com/k2-fsa/sherpa-onnx/releases
 
- We use the following model for demonstration.
+ For instance, you can use the following addresses:
 
- ```bash
- cd $HOME/open-source/sherpa-onnx/mfc-examples/x64/Release
- wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
- wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx
- wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
- wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
- 
- # now rename
- mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx
- mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx
- mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx
- 
- # Now run it!
- ./StreamingSpeechRecognition.exe
- ```
+   - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe
+   - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe
--- a/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.cpp
查看文件 @0abd7ce
+++ b/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.cpp
查看文件 @0abd7ce
@@ -3,12 +3,14 @@
 // application.
 //
 
+ // clang-format off
 #include "pch.h"
 #include "framework.h"
+ // clang-format on
 
 #include "StreamingSpeechRecognition.h"
- #include "StreamingSpeechRecognitionDlg.h"
 
+ #include "StreamingSpeechRecognitionDlg.h"
 
 #ifdef _DEBUG
 #define new DEBUG_NEW
--- a/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.cpp
查看文件 @0abd7ce
+++ b/mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.cpp
查看文件 @0abd7ce
 
 // StreamingSpeechRecognitionDlg.cpp : implementation file
 //
+ // clang-format off
 #include "pch.h"
 #include "framework.h"
 #include "afxdialogex.h"
- 
+ // clang-format on
 
 #include "StreamingSpeechRecognitionDlg.h"
 
@@ -15,7 +16,6 @@
 
 #include "StreamingSpeechRecognition.h"
 
- 
 #ifdef _DEBUG
 #define new DEBUG_NEW
 #endif
@@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() {
     // exit(-1);
     AppendLineToMultilineEditCtrl("No default input device found!");
     my_btn_.EnableWindow(FALSE);
+     return;
   }
   AppendLineToMultilineEditCtrl(std::string("Selected device ") +
                                 Pa_GetDeviceInfo(default_device)->name);
@@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() {
     msg += "\r\n";
     msg += "That's it!\r\n";
 
- 
     AppendLineToMultilineEditCtrl(msg);
     return;
   }
@@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
   // put the selection at the end of text
   my_text_.SetSel(nLength, nLength);
   // replace the selection
-   CString str;
-   str.Format(_T("%s"), s.c_str());
 
   std::wstring wstr = Utf8ToUtf16(s);
 
--- a/mfc-examples/mfc-examples.sln
查看文件 @0abd7ce
+++ b/mfc-examples/mfc-examples.sln
查看文件 @0abd7ce
 
 Microsoft Visual Studio Solution File, Format Version 12.00
- # Visual Studio Version 16
- VisualStudioVersion = 16.0.32630.194
+ # Visual Studio Version 17
+ VisualStudioVersion = 17.6.33829.357
 MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
 EndProject
+ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
+ EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|x64 = Debug|x64
@@ -21,6 +23,14 @@ Global
 		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
 		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
 		{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
+ 		{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @0abd7ce
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @0abd7ce
@@ -27,36 +27,38 @@ struct SherpaOnnxDisplay {
   std::unique_ptr<sherpa_onnx::Display> impl;
 };
 
+ #define SHERPA_ONNX_OR(x, y) (x ? x : y)
+ 
 SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
     const SherpaOnnxOnlineRecognizerConfig *config) {
   sherpa_onnx::OnlineRecognizerConfig recognizer_config;
 
-   recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
-   recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
+   recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
+   recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
 
   recognizer_config.model_config.encoder_filename =
-       config->model_config.encoder;
+       SHERPA_ONNX_OR(config->model_config.encoder, "");
   recognizer_config.model_config.decoder_filename =
-       config->model_config.decoder;
-   recognizer_config.model_config.joiner_filename = config->model_config.joiner;
-   recognizer_config.model_config.tokens = config->model_config.tokens;
-   recognizer_config.model_config.num_threads = config->model_config.num_threads;
-   recognizer_config.model_config.provider = config->model_config.provider;
-   recognizer_config.model_config.debug = config->model_config.debug;
+       SHERPA_ONNX_OR(config->model_config.decoder, "");
+   recognizer_config.model_config.joiner_filename = SHERPA_ONNX_OR(config->model_config.joiner, "");
+   recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
+   recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
+   recognizer_config.model_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu");
+   recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
 
-   recognizer_config.decoding_method = config->decoding_method;
-   recognizer_config.max_active_paths = config->max_active_paths;
+   recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
+   recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
 
-   recognizer_config.enable_endpoint = config->enable_endpoint;
+   recognizer_config.enable_endpoint = SHERPA_ONNX_OR(config->enable_endpoint, 0);
 
   recognizer_config.endpoint_config.rule1.min_trailing_silence =
-       config->rule1_min_trailing_silence;
+       SHERPA_ONNX_OR(config->rule1_min_trailing_silence, 2.4);
 
   recognizer_config.endpoint_config.rule2.min_trailing_silence =
-       config->rule2_min_trailing_silence;
+       SHERPA_ONNX_OR(config->rule2_min_trailing_silence, 1.2);
 
   recognizer_config.endpoint_config.rule3.min_utterance_length =
-       config->rule3_min_utterance_length;
+       SHERPA_ONNX_OR(config->rule3_min_utterance_length, 20);
 
   if (config->model_config.debug) {
     fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
@@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
     const SherpaOnnxOfflineRecognizerConfig *config) {
   sherpa_onnx::OfflineRecognizerConfig recognizer_config;
 
-   recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;
+   recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
 
-   recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim;
+   recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
 
   recognizer_config.model_config.transducer.encoder_filename =
-       config->model_config.transducer.encoder;
+       SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
 
   recognizer_config.model_config.transducer.decoder_filename =
-       config->model_config.transducer.decoder;
+       SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
 
   recognizer_config.model_config.transducer.joiner_filename =
-       config->model_config.transducer.joiner;
+       SHERPA_ONNX_OR(config->model_config.transducer.joiner,"");
 
   recognizer_config.model_config.paraformer.model =
-       config->model_config.paraformer.model;
+       SHERPA_ONNX_OR(config->model_config.paraformer.model, "");
 
   recognizer_config.model_config.nemo_ctc.model =
-       config->model_config.nemo_ctc.model;
+       SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
 
-   recognizer_config.model_config.tokens = config->model_config.tokens;
-   recognizer_config.model_config.num_threads = config->model_config.num_threads;
-   recognizer_config.model_config.debug = config->model_config.debug;
+   recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
+   recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
+   recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
 
-   recognizer_config.lm_config.model = config->lm_config.model;
-   recognizer_config.lm_config.scale = config->lm_config.scale;
+   recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, "");
+   recognizer_config.lm_config.scale = SHERPA_ONNX_OR(config->lm_config.scale, 1.0);
 
-   recognizer_config.decoding_method = config->decoding_method;
-   recognizer_config.max_active_paths = config->max_active_paths;
+   recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
+   recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
 
   if (config->model_config.debug) {
     fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());