Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2023-07-14 17:00:14 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2023-07-14 17:00:14 +0800
Commit
0abd7ce88119230f7ca86ec48b614a61c4255224
0abd7ce8
1 parent
bebc1f13
Add non-streaming speech recognition examples for MFC (#212)
隐藏空白字符变更
内嵌
并排对比
正在显示
22 个修改的文件
包含
1153 行增加
和
63 行删除
.github/workflows/mfc.yaml
cmake/onnxruntime.cmake
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.cpp
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.h
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.rc
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj.filters
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
mfc-examples/NonStreamingSpeechRecognition/Resource.h
mfc-examples/NonStreamingSpeechRecognition/framework.h
mfc-examples/NonStreamingSpeechRecognition/pch.cpp
mfc-examples/NonStreamingSpeechRecognition/pch.h
mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.ico
mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.rc2
mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props
mfc-examples/NonStreamingSpeechRecognition/targetver.h
mfc-examples/README.md
mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.cpp
mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.cpp
mfc-examples/mfc-examples.sln
sherpa-onnx/c-api/c-api.cc
.github/workflows/mfc.yaml
查看文件 @
0abd7ce
...
...
@@ -98,6 +98,7 @@ jobs:
cd mfc-examples/$arch/Release
cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
ls -lh
-
name
:
Upload artifact
...
...
@@ -106,10 +107,24 @@ jobs:
name
:
streaming-speech-recognition-${{ matrix.arch }}
path
:
./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe
-
name
:
Release pre-compiled binaries and libs for macOS
-
name
:
Upload artifact
uses
:
actions/upload-artifact@v2
with
:
name
:
non-streaming-speech-recognition-${{ matrix.arch }}
path
:
./mfc-examples/${{ matrix.arch }}/Release/NonStreamingSpeechRecognition.exe
-
name
:
Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
if
:
env.RELEASE == 'true'
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
overwrite
:
true
file
:
./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe
-
name
:
Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
if
:
env.RELEASE == 'true'
uses
:
svenstaro/upload-release-action@v2
with
:
file_glob
:
true
overwrite
:
true
file
:
./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx*.exe
file
:
./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx
-non-streaming-
*.exe
...
...
cmake/onnxruntime.cmake
查看文件 @
0abd7ce
...
...
@@ -113,7 +113,7 @@ function(download_onnxruntime)
set
(
onnxruntime_URL
"https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2"
)
set
(
onnxruntime_URL2
""
)
set
(
onnxruntime_HASH
"SHA256=
a2b33a3e8a1f89cddf303f0a97a5a88f4202579c653cfb29158c8cf7da3734eb
"
)
set
(
onnxruntime_HASH
"SHA256=
94d9a30976b5c4a5dff7508d00f141835916e5a36315d5f53be9b3edb85148b5
"
)
endif
()
if
(
SHERPA_ONNX_ENABLE_GPU
)
...
...
@@ -161,7 +161,7 @@ function(download_onnxruntime)
set
(
onnxruntime_URL
"https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2"
)
set
(
onnxruntime_URL2
""
)
set
(
onnxruntime_HASH
"SHA256=
f5c19ac1fc6a61c78a231a41df10aede2586665ab397bdc3f007eb8d2c8d4a19
"
)
set
(
onnxruntime_HASH
"SHA256=
c809a8510a89b8b37ae7d563c39229db22bac8fbefcbfe5c81a60b367d065b1b
"
)
endif
()
endif
()
# After downloading, it contains:
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.cpp
0 → 100644
查看文件 @
0abd7ce
// NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the
// application.
//
// clang-format off
#include "pch.h"
#include "framework.h"
#include "NonStreamingSpeechRecognitionDlg.h"
#include "NonStreamingSpeechRecognition.h"
// clang-format on
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
// CNonStreamingSpeechRecognitionApp
BEGIN_MESSAGE_MAP
(
CNonStreamingSpeechRecognitionApp
,
CWinApp
)
ON_COMMAND
(
ID_HELP
,
&
CWinApp
::
OnHelp
)
END_MESSAGE_MAP
()
// CNonStreamingSpeechRecognitionApp construction
CNonStreamingSpeechRecognitionApp
::
CNonStreamingSpeechRecognitionApp
()
{
// TODO: add construction code here,
// Place all significant initialization in InitInstance
}
// The one and only CNonStreamingSpeechRecognitionApp object
CNonStreamingSpeechRecognitionApp
theApp
;
// CNonStreamingSpeechRecognitionApp initialization
BOOL
CNonStreamingSpeechRecognitionApp
::
InitInstance
()
{
CWinApp
::
InitInstance
();
// Create the shell manager, in case the dialog contains
// any shell tree view or shell list view controls.
CShellManager
*
pShellManager
=
new
CShellManager
;
// Activate "Windows Native" visual manager for enabling themes in MFC
// controls
CMFCVisualManager
::
SetDefaultManager
(
RUNTIME_CLASS
(
CMFCVisualManagerWindows
));
// Standard initialization
// If you are not using these features and wish to reduce the size
// of your final executable, you should remove from the following
// the specific initialization routines you do not need
// Change the registry key under which our settings are stored
// TODO: You should modify this string to be something appropriate
// such as the name of your company or organization
SetRegistryKey
(
_T
(
"Local AppWizard-Generated Applications"
));
CNonStreamingSpeechRecognitionDlg
dlg
;
m_pMainWnd
=
&
dlg
;
INT_PTR
nResponse
=
dlg
.
DoModal
();
if
(
nResponse
==
IDOK
)
{
// TODO: Place code here to handle when the dialog is
// dismissed with OK
}
else
if
(
nResponse
==
IDCANCEL
)
{
// TODO: Place code here to handle when the dialog is
// dismissed with Cancel
}
else
if
(
nResponse
==
-
1
)
{
TRACE
(
traceAppMsg
,
0
,
"Warning: dialog creation failed, so application is terminating "
"unexpectedly.
\n
"
);
TRACE
(
traceAppMsg
,
0
,
"Warning: if you are using MFC controls on the dialog, you cannot "
"#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.
\n
"
);
}
// Delete the shell manager created above.
if
(
pShellManager
!=
nullptr
)
{
delete
pShellManager
;
}
#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
ControlBarCleanUp
();
#endif
// Since the dialog has been closed, return FALSE so that we exit the
// application, rather than start the application's message pump.
return
FALSE
;
}
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.h
0 → 100644
查看文件 @
0abd7ce
// NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME
// application
//
#pragma once
#ifndef __AFXWIN_H__
#error "include 'pch.h' before including this file for PCH"
#endif
#include "resource.h" // main symbols
// CNonStreamingSpeechRecognitionApp:
// See NonStreamingSpeechRecognition.cpp for the implementation of this class
//
class
CNonStreamingSpeechRecognitionApp
:
public
CWinApp
{
public
:
CNonStreamingSpeechRecognitionApp
();
// Overrides
public:
virtual
BOOL
InitInstance
();
// Implementation
DECLARE_MESSAGE_MAP
()
};
extern
CNonStreamingSpeechRecognitionApp
theApp
;
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.rc
0 → 100644
查看文件 @
0abd7ce
B
// Microsoft Visual C++ generated resource script.
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj
0 → 100644
查看文件 @
0abd7ce
<?xml version="1.0" encoding="utf-8"?>
<Project
DefaultTargets=
"Build"
xmlns=
"http://schemas.microsoft.com/developer/msbuild/2003"
>
<ItemGroup
Label=
"ProjectConfigurations"
>
<ProjectConfiguration
Include=
"Debug|Win32"
>
<Configuration>
Debug
</Configuration>
<Platform>
Win32
</Platform>
</ProjectConfiguration>
<ProjectConfiguration
Include=
"Release|Win32"
>
<Configuration>
Release
</Configuration>
<Platform>
Win32
</Platform>
</ProjectConfiguration>
<ProjectConfiguration
Include=
"Debug|x64"
>
<Configuration>
Debug
</Configuration>
<Platform>
x64
</Platform>
</ProjectConfiguration>
<ProjectConfiguration
Include=
"Release|x64"
>
<Configuration>
Release
</Configuration>
<Platform>
x64
</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup
Label=
"Globals"
>
<VCProjectVersion>
17.0
</VCProjectVersion>
<ProjectGuid>
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}
</ProjectGuid>
<Keyword>
MFCProj
</Keyword>
<RootNamespace>
NonStreamingSpeechRecognition
</RootNamespace>
<WindowsTargetPlatformVersion>
10.0
</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import
Project=
"$(VCTargetsPath)\Microsoft.Cpp.Default.props"
/>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
Label=
"Configuration"
>
<ConfigurationType>
Application
</ConfigurationType>
<UseDebugLibraries>
true
</UseDebugLibraries>
<PlatformToolset>
v143
</PlatformToolset>
<CharacterSet>
Unicode
</CharacterSet>
<UseOfMfc>
Static
</UseOfMfc>
</PropertyGroup>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
Label=
"Configuration"
>
<ConfigurationType>
Application
</ConfigurationType>
<UseDebugLibraries>
false
</UseDebugLibraries>
<PlatformToolset>
v143
</PlatformToolset>
<WholeProgramOptimization>
true
</WholeProgramOptimization>
<CharacterSet>
Unicode
</CharacterSet>
<UseOfMfc>
Static
</UseOfMfc>
</PropertyGroup>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
Label=
"Configuration"
>
<ConfigurationType>
Application
</ConfigurationType>
<UseDebugLibraries>
true
</UseDebugLibraries>
<PlatformToolset>
v143
</PlatformToolset>
<CharacterSet>
Unicode
</CharacterSet>
<UseOfMfc>
Static
</UseOfMfc>
</PropertyGroup>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
Label=
"Configuration"
>
<ConfigurationType>
Application
</ConfigurationType>
<UseDebugLibraries>
false
</UseDebugLibraries>
<PlatformToolset>
v143
</PlatformToolset>
<WholeProgramOptimization>
true
</WholeProgramOptimization>
<CharacterSet>
Unicode
</CharacterSet>
<UseOfMfc>
Static
</UseOfMfc>
</PropertyGroup>
<Import
Project=
"$(VCTargetsPath)\Microsoft.Cpp.props"
/>
<ImportGroup
Label=
"ExtensionSettings"
>
</ImportGroup>
<ImportGroup
Label=
"Shared"
>
</ImportGroup>
<ImportGroup
Label=
"PropertySheets"
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
<Import
Project=
"$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props"
Condition=
"exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')"
Label=
"LocalAppDataPlatform"
/>
<Import
Project=
"sherpa-onnx-deps.props"
/>
</ImportGroup>
<ImportGroup
Label=
"PropertySheets"
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
<Import
Project=
"$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props"
Condition=
"exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')"
Label=
"LocalAppDataPlatform"
/>
<Import
Project=
"sherpa-onnx-deps.props"
/>
</ImportGroup>
<ImportGroup
Label=
"PropertySheets"
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
<Import
Project=
"$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props"
Condition=
"exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')"
Label=
"LocalAppDataPlatform"
/>
<Import
Project=
"sherpa-onnx-deps.props"
/>
</ImportGroup>
<ImportGroup
Label=
"PropertySheets"
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
<Import
Project=
"$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props"
Condition=
"exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')"
Label=
"LocalAppDataPlatform"
/>
<Import
Project=
"sherpa-onnx-deps.props"
/>
</ImportGroup>
<PropertyGroup
Label=
"UserMacros"
/>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
<LinkIncremental>
false
</LinkIncremental>
</PropertyGroup>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
<LinkIncremental>
true
</LinkIncremental>
</PropertyGroup>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
<LinkIncremental>
true
</LinkIncremental>
</PropertyGroup>
<PropertyGroup
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
<LinkIncremental>
false
</LinkIncremental>
</PropertyGroup>
<ItemDefinitionGroup
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
<ClCompile>
<PrecompiledHeader>
Use
</PrecompiledHeader>
<WarningLevel>
Level3
</WarningLevel>
<FunctionLevelLinking>
true
</FunctionLevelLinking>
<IntrinsicFunctions>
true
</IntrinsicFunctions>
<SDLCheck>
true
</SDLCheck>
<PreprocessorDefinitions>
_WINDOWS;NDEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<PrecompiledHeaderFile>
pch.h
</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>
Windows
</SubSystem>
<EnableCOMDATFolding>
true
</EnableCOMDATFolding>
<OptimizeReferences>
true
</OptimizeReferences>
</Link>
<Midl>
<MkTypLibCompatible>
false
</MkTypLibCompatible>
<ValidateAllParameters>
true
</ValidateAllParameters>
<PreprocessorDefinitions>
NDEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>
0x0409
</Culture>
<PreprocessorDefinitions>
NDEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<AdditionalIncludeDirectories>
$(IntDir);%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
<ClCompile>
<PrecompiledHeader>
Use
</PrecompiledHeader>
<WarningLevel>
Level3
</WarningLevel>
<SDLCheck>
true
</SDLCheck>
<PreprocessorDefinitions>
WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<PrecompiledHeaderFile>
pch.h
</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>
Windows
</SubSystem>
</Link>
<Midl>
<MkTypLibCompatible>
false
</MkTypLibCompatible>
<ValidateAllParameters>
true
</ValidateAllParameters>
<PreprocessorDefinitions>
_DEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>
0x0409
</Culture>
<PreprocessorDefinitions>
_DEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<AdditionalIncludeDirectories>
$(IntDir);%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
<ClCompile>
<PrecompiledHeader>
Use
</PrecompiledHeader>
<WarningLevel>
Level3
</WarningLevel>
<SDLCheck>
true
</SDLCheck>
<PreprocessorDefinitions>
_WINDOWS;_DEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<PrecompiledHeaderFile>
pch.h
</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>
Windows
</SubSystem>
</Link>
<Midl>
<MkTypLibCompatible>
false
</MkTypLibCompatible>
<ValidateAllParameters>
true
</ValidateAllParameters>
<PreprocessorDefinitions>
_DEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>
0x0409
</Culture>
<PreprocessorDefinitions>
_DEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<AdditionalIncludeDirectories>
$(IntDir);%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
<ClCompile>
<PrecompiledHeader>
Use
</PrecompiledHeader>
<WarningLevel>
Level3
</WarningLevel>
<FunctionLevelLinking>
true
</FunctionLevelLinking>
<IntrinsicFunctions>
true
</IntrinsicFunctions>
<SDLCheck>
true
</SDLCheck>
<PreprocessorDefinitions>
WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<PrecompiledHeaderFile>
pch.h
</PrecompiledHeaderFile>
</ClCompile>
<Link>
<SubSystem>
Windows
</SubSystem>
<EnableCOMDATFolding>
true
</EnableCOMDATFolding>
<OptimizeReferences>
true
</OptimizeReferences>
</Link>
<Midl>
<MkTypLibCompatible>
false
</MkTypLibCompatible>
<ValidateAllParameters>
true
</ValidateAllParameters>
<PreprocessorDefinitions>
NDEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
</Midl>
<ResourceCompile>
<Culture>
0x0409
</Culture>
<PreprocessorDefinitions>
NDEBUG;%(PreprocessorDefinitions)
</PreprocessorDefinitions>
<AdditionalIncludeDirectories>
$(IntDir);%(AdditionalIncludeDirectories)
</AdditionalIncludeDirectories>
</ResourceCompile>
</ItemDefinitionGroup>
<ItemGroup>
<ClInclude
Include=
"framework.h"
/>
<ClInclude
Include=
"NonStreamingSpeechRecognition.h"
/>
<ClInclude
Include=
"NonStreamingSpeechRecognitionDlg.h"
/>
<ClInclude
Include=
"pch.h"
/>
<ClInclude
Include=
"Resource.h"
/>
<ClInclude
Include=
"targetver.h"
/>
</ItemGroup>
<ItemGroup>
<ClCompile
Include=
"NonStreamingSpeechRecognition.cpp"
/>
<ClCompile
Include=
"NonStreamingSpeechRecognitionDlg.cpp"
/>
<ClCompile
Include=
"pch.cpp"
>
<PrecompiledHeader
Condition=
"'$(Configuration)|$(Platform)'=='Release|x64'"
>
Create
</PrecompiledHeader>
<PrecompiledHeader
Condition=
"'$(Configuration)|$(Platform)'=='Debug|Win32'"
>
Create
</PrecompiledHeader>
<PrecompiledHeader
Condition=
"'$(Configuration)|$(Platform)'=='Debug|x64'"
>
Create
</PrecompiledHeader>
<PrecompiledHeader
Condition=
"'$(Configuration)|$(Platform)'=='Release|Win32'"
>
Create
</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile
Include=
"NonStreamingSpeechRecognition.rc"
/>
</ItemGroup>
<ItemGroup>
<None
Include=
"res\NonStreamingSpeechRecognition.rc2"
/>
</ItemGroup>
<ItemGroup>
<Image
Include=
"res\NonStreamingSpeechRecognition.ico"
/>
</ItemGroup>
<Import
Project=
"$(VCTargetsPath)\Microsoft.Cpp.targets"
/>
<ImportGroup
Label=
"ExtensionTargets"
>
</ImportGroup>
</Project>
\ No newline at end of file
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognition.vcxproj.filters
0 → 100644
查看文件 @
0abd7ce
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<Filter Include="Source Files">
<UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
<Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
</Filter>
<Filter Include="Header Files">
<UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
<Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
</Filter>
<Filter Include="Resource Files">
<UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
<Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
</Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="NonStreamingSpeechRecognition.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="NonStreamingSpeechRecognitionDlg.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="framework.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="targetver.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Resource.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="pch.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="NonStreamingSpeechRecognition.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="pch.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="NonStreamingSpeechRecognition.rc">
<Filter>Resource Files</Filter>
</ResourceCompile>
</ItemGroup>
<ItemGroup>
<None Include="res\NonStreamingSpeechRecognition.rc2">
<Filter>Resource Files</Filter>
</None>
</ItemGroup>
<ItemGroup>
<Image Include="res\NonStreamingSpeechRecognition.ico">
<Filter>Resource Files</Filter>
</Image>
</ItemGroup>
</Project>
\ No newline at end of file
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.cpp
0 → 100644
查看文件 @
0abd7ce
// NonStreamingSpeechRecognitionDlg.cpp : implementation file
//
// clang-format off
#include "pch.h"
#include "framework.h"
#include "afxdialogex.h"
#include "NonStreamingSpeechRecognition.h"
#include "NonStreamingSpeechRecognitionDlg.h"
// clang-format on
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
Microphone
::
Microphone
()
{
PaError
err
=
Pa_Initialize
();
if
(
err
!=
paNoError
)
{
fprintf
(
stderr
,
"portaudio error: %s
\n
"
,
Pa_GetErrorText
(
err
));
exit
(
-
2
);
}
}
Microphone
::~
Microphone
()
{
PaError
err
=
Pa_Terminate
();
if
(
err
!=
paNoError
)
{
fprintf
(
stderr
,
"portaudio error: %s
\n
"
,
Pa_GetErrorText
(
err
));
exit
(
-
2
);
}
}
// see
// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
static
std
::
wstring
Utf8ToUtf16
(
const
std
::
string
&
utf8
)
{
std
::
vector
<
unsigned
long
>
unicode
;
size_t
i
=
0
;
while
(
i
<
utf8
.
size
())
{
unsigned
long
uni
;
size_t
todo
;
bool
error
=
false
;
unsigned
char
ch
=
utf8
[
i
++
];
if
(
ch
<=
0x7F
)
{
uni
=
ch
;
todo
=
0
;
}
else
if
(
ch
<=
0xBF
)
{
throw
std
::
logic_error
(
"not a UTF-8 string"
);
}
else
if
(
ch
<=
0xDF
)
{
uni
=
ch
&
0x1F
;
todo
=
1
;
}
else
if
(
ch
<=
0xEF
)
{
uni
=
ch
&
0x0F
;
todo
=
2
;
}
else
if
(
ch
<=
0xF7
)
{
uni
=
ch
&
0x07
;
todo
=
3
;
}
else
{
throw
std
::
logic_error
(
"not a UTF-8 string"
);
}
for
(
size_t
j
=
0
;
j
<
todo
;
++
j
)
{
if
(
i
==
utf8
.
size
())
throw
std
::
logic_error
(
"not a UTF-8 string"
);
unsigned
char
ch
=
utf8
[
i
++
];
if
(
ch
<
0x80
||
ch
>
0xBF
)
throw
std
::
logic_error
(
"not a UTF-8 string"
);
uni
<<=
6
;
uni
+=
ch
&
0x3F
;
}
if
(
uni
>=
0xD800
&&
uni
<=
0xDFFF
)
throw
std
::
logic_error
(
"not a UTF-8 string"
);
if
(
uni
>
0x10FFFF
)
throw
std
::
logic_error
(
"not a UTF-8 string"
);
unicode
.
push_back
(
uni
);
}
std
::
wstring
utf16
;
for
(
size_t
i
=
0
;
i
<
unicode
.
size
();
++
i
)
{
unsigned
long
uni
=
unicode
[
i
];
if
(
uni
<=
0xFFFF
)
{
utf16
+=
(
wchar_t
)
uni
;
}
else
{
uni
-=
0x10000
;
utf16
+=
(
wchar_t
)((
uni
>>
10
)
+
0xD800
);
utf16
+=
(
wchar_t
)((
uni
&
0x3FF
)
+
0xDC00
);
}
}
return
utf16
;
}
static
std
::
string
Cat
(
const
std
::
vector
<
std
::
string
>
&
results
)
{
std
::
ostringstream
os
;
std
::
string
sep
;
int
i
=
0
;
for
(
i
=
0
;
i
!=
results
.
size
();
++
i
)
{
os
<<
sep
<<
i
<<
": "
<<
results
[
i
];
sep
=
"
\r\n
"
;
}
return
os
.
str
();
}
// CNonStreamingSpeechRecognitionDlg dialog
CNonStreamingSpeechRecognitionDlg
::
CNonStreamingSpeechRecognitionDlg
(
CWnd
*
pParent
/*=nullptr*/
)
:
CDialogEx
(
IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG
,
pParent
)
{
m_hIcon
=
AfxGetApp
()
->
LoadIcon
(
IDR_MAINFRAME
);
}
CNonStreamingSpeechRecognitionDlg
::~
CNonStreamingSpeechRecognitionDlg
()
{
if
(
recognizer_
)
{
DestroyOfflineRecognizer
(
recognizer_
);
recognizer_
=
nullptr
;
}
}
void
CNonStreamingSpeechRecognitionDlg
::
DoDataExchange
(
CDataExchange
*
pDX
)
{
CDialogEx
::
DoDataExchange
(
pDX
);
DDX_Control
(
pDX
,
IDC_EDIT1
,
my_text_
);
DDX_Control
(
pDX
,
IDOK
,
my_btn_
);
}
BEGIN_MESSAGE_MAP
(
CNonStreamingSpeechRecognitionDlg
,
CDialogEx
)
ON_WM_PAINT
()
ON_WM_QUERYDRAGICON
()
ON_BN_CLICKED
(
IDOK
,
&
CNonStreamingSpeechRecognitionDlg
::
OnBnClickedOk
)
END_MESSAGE_MAP
()
// CNonStreamingSpeechRecognitionDlg message handlers
BOOL
CNonStreamingSpeechRecognitionDlg
::
OnInitDialog
()
{
CDialogEx
::
OnInitDialog
();
// Set the icon for this dialog. The framework does this automatically
// when the application's main window is not a dialog
SetIcon
(
m_hIcon
,
TRUE
);
// Set big icon
SetIcon
(
m_hIcon
,
FALSE
);
// Set small icon
// TODO: Add extra initialization here
InitMicrophone
();
return
TRUE
;
// return TRUE unless you set the focus to a control
}
// If you add a minimize button to your dialog, you will need the code below
// to draw the icon. For MFC applications using the document/view model,
// this is automatically done for you by the framework.
void
CNonStreamingSpeechRecognitionDlg
::
OnPaint
()
{
if
(
IsIconic
())
{
CPaintDC
dc
(
this
);
// device context for painting
SendMessage
(
WM_ICONERASEBKGND
,
reinterpret_cast
<
WPARAM
>
(
dc
.
GetSafeHdc
()),
0
);
// Center icon in client rectangle
int
cxIcon
=
GetSystemMetrics
(
SM_CXICON
);
int
cyIcon
=
GetSystemMetrics
(
SM_CYICON
);
CRect
rect
;
GetClientRect
(
&
rect
);
int
x
=
(
rect
.
Width
()
-
cxIcon
+
1
)
/
2
;
int
y
=
(
rect
.
Height
()
-
cyIcon
+
1
)
/
2
;
// Draw the icon
dc
.
DrawIcon
(
x
,
y
,
m_hIcon
);
}
else
{
CDialogEx
::
OnPaint
();
}
}
// The system calls this function to obtain the cursor to display while the user
// drags
// the minimized window.
HCURSOR
CNonStreamingSpeechRecognitionDlg
::
OnQueryDragIcon
()
{
return
static_cast
<
HCURSOR
>
(
m_hIcon
);
}
static
int32_t
RecordCallback
(
const
void
*
input_buffer
,
void
*
/*output_buffer*/
,
unsigned
long
frames_per_buffer
,
// NOLINT
const
PaStreamCallbackTimeInfo
*
/*time_info*/
,
PaStreamCallbackFlags
/*status_flags*/
,
void
*
user_data
)
{
auto
dlg
=
reinterpret_cast
<
CNonStreamingSpeechRecognitionDlg
*>
(
user_data
);
auto
begin
=
reinterpret_cast
<
const
float
*>
(
input_buffer
);
auto
end
=
begin
+
frames_per_buffer
;
dlg
->
samples_
.
insert
(
dlg
->
samples_
.
end
(),
begin
,
end
);
return
dlg
->
started_
?
paContinue
:
paComplete
;
}
void
CNonStreamingSpeechRecognitionDlg
::
OnBnClickedOk
()
{
if
(
!
recognizer_
)
{
AppendLineToMultilineEditCtrl
(
"Creating recognizer..."
);
AppendLineToMultilineEditCtrl
(
"It will take several seconds. Please wait"
);
InitRecognizer
();
if
(
!
recognizer_
)
{
// failed to create the recognizer
return
;
}
AppendLineToMultilineEditCtrl
(
"Recognizer created!"
);
}
if
(
!
started_
)
{
samples_
.
clear
();
started_
=
true
;
PaStreamParameters
param
;
param
.
device
=
Pa_GetDefaultInputDevice
();
const
PaDeviceInfo
*
info
=
Pa_GetDeviceInfo
(
param
.
device
);
param
.
channelCount
=
1
;
param
.
sampleFormat
=
paFloat32
;
param
.
suggestedLatency
=
info
->
defaultLowInputLatency
;
param
.
hostApiSpecificStreamInfo
=
nullptr
;
float
sample_rate
=
config_
.
feat_config
.
sample_rate
;
pa_stream_
=
nullptr
;
PaError
err
=
Pa_OpenStream
(
&
pa_stream_
,
&
param
,
nullptr
,
/* &outputParameters, */
sample_rate
,
0
,
// frames per buffer
paClipOff
,
// we won't output out of range samples
// so don't bother clipping them
RecordCallback
,
this
);
if
(
err
!=
paNoError
)
{
AppendLineToMultilineEditCtrl
(
std
::
string
(
"PortAudio error: "
)
+
Pa_GetErrorText
(
err
));
my_btn_
.
EnableWindow
(
FALSE
);
return
;
}
err
=
Pa_StartStream
(
pa_stream_
);
if
(
err
!=
paNoError
)
{
AppendLineToMultilineEditCtrl
(
std
::
string
(
"PortAudio error: "
)
+
Pa_GetErrorText
(
err
));
my_btn_
.
EnableWindow
(
FALSE
);
return
;
}
AppendLineToMultilineEditCtrl
(
"
\r\n
Started! Please speak and click stop.
\r\n
"
);
my_btn_
.
SetWindowText
(
_T
(
"Stop"
));
}
else
{
started_
=
false
;
Pa_Sleep
(
200
);
// sleep for 200ms
if
(
pa_stream_
)
{
PaError
err
=
Pa_CloseStream
(
pa_stream_
);
if
(
err
!=
paNoError
)
{
AppendLineToMultilineEditCtrl
(
std
::
string
(
"PortAudio error: "
)
+
Pa_GetErrorText
(
err
));
my_btn_
.
EnableWindow
(
FALSE
);
return
;
}
}
pa_stream_
=
nullptr
;
SherpaOnnxOfflineStream
*
stream
=
CreateOfflineStream
(
recognizer_
);
AcceptWaveformOffline
(
stream
,
config_
.
feat_config
.
sample_rate
,
samples_
.
data
(),
samples_
.
size
());
DecodeOfflineStream
(
recognizer_
,
stream
);
SherpaOnnxOfflineRecognizerResult
*
r
=
GetOfflineStreamResult
(
stream
);
results_
.
emplace_back
(
r
->
text
);
auto
str
=
Utf8ToUtf16
(
Cat
(
results_
).
c_str
());
my_text_
.
SetWindowText
(
str
.
c_str
());
my_text_
.
SetFocus
();
my_text_
.
SetSel
(
-
1
);
DestroyOfflineRecognizerResult
(
r
);
DestroyOfflineStream
(
stream
);
// AfxMessageBox("Stopped", MB_OK);
my_btn_
.
SetWindowText
(
_T
(
"Start"
));
AppendLineToMultilineEditCtrl
(
"
\r\n
Stopped. Please click start and speak"
);
}
}
void
CNonStreamingSpeechRecognitionDlg
::
InitMicrophone
()
{
int
default_device
=
Pa_GetDefaultInputDevice
();
int
device_count
=
Pa_GetDeviceCount
();
if
(
default_device
==
paNoDevice
)
{
// CString str;
// str.Format(_T("No default input device found!"));
// AfxMessageBox(str, MB_OK | MB_ICONSTOP);
// exit(-1);
AppendLineToMultilineEditCtrl
(
"No default input device found!"
);
my_btn_
.
EnableWindow
(
FALSE
);
return
;
}
AppendLineToMultilineEditCtrl
(
std
::
string
(
"Selected device "
)
+
Pa_GetDeviceInfo
(
default_device
)
->
name
);
}
bool
CNonStreamingSpeechRecognitionDlg
::
Exists
(
const
std
::
string
&
filename
)
{
std
::
ifstream
is
(
filename
);
return
is
.
good
();
}
void
CNonStreamingSpeechRecognitionDlg
::
ShowInitRecognizerHelpMessage
()
{
my_btn_
.
EnableWindow
(
FALSE
);
std
::
string
msg
=
"
\r\n
Please go to
\r\n
"
"https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
"
\r\n
"
;
msg
+=
"to download a non-streaming model, i.e., an offline model.
\r\n
"
;
msg
+=
"You need to rename them to encoder.onnx, decoder.onnx, and "
"joiner.onnx correspoondingly.
\r\n\r\n
"
;
msg
+=
"It supports both transducer models and paraformer models.
\r\n\r\n
"
;
msg
+=
"We give two examples below to show you how to download models
\r\n\r\n
"
;
msg
+=
"(1) Transducer
\r\n\r\n
"
;
msg
+=
"We use "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615 below
\r\n
"
;
msg
+=
"wget "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
"encoder-epoch-12-avg-4.onnx
\r\n
"
;
msg
+=
"wget "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
"decoder-epoch-12-avg-4.onnx
\r\n
"
;
msg
+=
"wget "
"https://huggingface.co/pkufool/"
"icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
"joiner-epoch-12-avg-4.onnx
\r\n
"
;
msg
+=
"
\r\n
Now rename them
\r\n
"
;
msg
+=
"mv encoder-epoch-12-avg-4.onnx encoder.onnx
\r\n
"
;
msg
+=
"mv decoder-epoch-12-avg-4.onnx decoder.onnx
\r\n
"
;
msg
+=
"mv joiner-epoch-12-avg-4.onnx joiner.onnx
\r\n\r\n
"
;
msg
+=
"(2) Paraformer
\r\n\r\n
"
;
msg
+=
"wget "
"https://huggingface.co/csukuangfj/"
"sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/model.onnx
\r\n
"
;
msg
+=
"wget "
"https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/"
"resolve/main/tokens.txt
\r\n\r\n
"
;
msg
+=
"
\r\n
Now rename them
\r\n
"
;
msg
+=
"mv model.onnx paraformer.onnx
\r\n
"
;
msg
+=
"
\r\n
"
;
msg
+=
"That's it!
\r\n
"
;
AppendLineToMultilineEditCtrl
(
msg
);
}
void
CNonStreamingSpeechRecognitionDlg
::
InitParaformer
()
{
std
::
string
paraformer
=
"./paraformer.onnx"
;
std
::
string
tokens
=
"./tokens.txt"
;
bool
is_ok
=
true
;
if
(
Exists
(
"./paraformer.int8.onnx"
))
{
paraformer
=
"./paraformer.int8.onnx"
;
}
else
if
(
!
Exists
(
paraformer
))
{
std
::
string
msg
=
paraformer
+
" does not exist!"
;
AppendLineToMultilineEditCtrl
(
msg
);
is_ok
=
false
;
}
if
(
!
Exists
(
tokens
))
{
std
::
string
msg
=
tokens
+
" does not exist!"
;
AppendLineToMultilineEditCtrl
(
msg
);
is_ok
=
false
;
}
if
(
!
is_ok
)
{
ShowInitRecognizerHelpMessage
();
return
;
}
memset
(
&
config_
,
0
,
sizeof
(
config_
));
config_
.
feat_config
.
sample_rate
=
16000
;
config_
.
feat_config
.
feature_dim
=
80
;
config_
.
model_config
.
paraformer
.
model
=
paraformer
.
c_str
();
config_
.
model_config
.
tokens
=
tokens
.
c_str
();
config_
.
model_config
.
num_threads
=
1
;
config_
.
model_config
.
debug
=
1
;
config_
.
decoding_method
=
"greedy_search"
;
config_
.
max_active_paths
=
4
;
recognizer_
=
CreateOfflineRecognizer
(
&
config_
);
}
void
CNonStreamingSpeechRecognitionDlg
::
InitRecognizer
()
{
if
(
Exists
(
"./paraformer.onnx"
)
||
Exists
(
"./paraformer.int8.onnx"
))
{
InitParaformer
();
return
;
}
// assume it is transducer
std
::
string
encoder
=
"./encoder.onnx"
;
std
::
string
decoder
=
"./decoder.onnx"
;
std
::
string
joiner
=
"./joiner.onnx"
;
std
::
string
tokens
=
"./tokens.txt"
;
bool
is_ok
=
true
;
if
(
!
Exists
(
encoder
))
{
std
::
string
msg
=
encoder
+
" does not exist!"
;
AppendLineToMultilineEditCtrl
(
msg
);
is_ok
=
false
;
}
if
(
!
Exists
(
decoder
))
{
std
::
string
msg
=
decoder
+
" does not exist!"
;
AppendLineToMultilineEditCtrl
(
msg
);
is_ok
=
false
;
}
if
(
!
Exists
(
joiner
))
{
std
::
string
msg
=
joiner
+
" does not exist!"
;
AppendLineToMultilineEditCtrl
(
msg
);
is_ok
=
false
;
}
if
(
!
Exists
(
tokens
))
{
std
::
string
msg
=
tokens
+
" does not exist!"
;
AppendLineToMultilineEditCtrl
(
msg
);
is_ok
=
false
;
}
if
(
!
is_ok
)
{
ShowInitRecognizerHelpMessage
();
return
;
}
memset
(
&
config_
,
0
,
sizeof
(
config_
));
config_
.
feat_config
.
sample_rate
=
16000
;
config_
.
feat_config
.
feature_dim
=
80
;
config_
.
model_config
.
transducer
.
encoder
=
encoder
.
c_str
();
config_
.
model_config
.
transducer
.
decoder
=
decoder
.
c_str
();
config_
.
model_config
.
transducer
.
joiner
=
joiner
.
c_str
();
config_
.
model_config
.
tokens
=
tokens
.
c_str
();
config_
.
model_config
.
num_threads
=
1
;
config_
.
model_config
.
debug
=
0
;
config_
.
decoding_method
=
"greedy_search"
;
config_
.
max_active_paths
=
4
;
recognizer_
=
CreateOfflineRecognizer
(
&
config_
);
}
void
CNonStreamingSpeechRecognitionDlg
::
AppendTextToEditCtrl
(
const
std
::
string
&
s
)
{
// get the initial text length
int
nLength
=
my_text_
.
GetWindowTextLength
();
// put the selection at the end of text
my_text_
.
SetSel
(
nLength
,
nLength
);
// replace the selection
std
::
wstring
wstr
=
Utf8ToUtf16
(
s
);
my_text_
.
ReplaceSel
(
wstr
.
c_str
());
}
void
CNonStreamingSpeechRecognitionDlg
::
AppendLineToMultilineEditCtrl
(
const
std
::
string
&
s
)
{
AppendTextToEditCtrl
(
"
\r\n
"
+
s
);
}
...
...
mfc-examples/NonStreamingSpeechRecognition/NonStreamingSpeechRecognitionDlg.h
0 → 100644
查看文件 @
0abd7ce
// NonStreamingSpeechRecognitionDlg.h : header file
//
#pragma once
#include <string>
#include <vector>
#include "portaudio.h"
#include "sherpa-onnx/c-api/c-api.h"
class
Microphone
{
public
:
Microphone
();
~
Microphone
();
};
// CNonStreamingSpeechRecognitionDlg dialog
class
CNonStreamingSpeechRecognitionDlg
:
public
CDialogEx
{
// Construction
public:
CNonStreamingSpeechRecognitionDlg
(
CWnd
*
pParent
=
nullptr
);
// standard constructor
~
CNonStreamingSpeechRecognitionDlg
();
// Dialog Data
#ifdef AFX_DESIGN_TIME
enum
{
IDD
=
IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG
};
#endif
protected
:
virtual
void
DoDataExchange
(
CDataExchange
*
pDX
);
// DDX/DDV support
// Implementation
protected:
HICON
m_hIcon
;
// Generated message map functions
virtual
BOOL
OnInitDialog
();
afx_msg
void
OnPaint
();
afx_msg
HCURSOR
OnQueryDragIcon
();
DECLARE_MESSAGE_MAP
()
public
:
afx_msg
void
OnBnClickedOk
();
int
RunThread
();
private
:
Microphone
mic_
;
SherpaOnnxOfflineRecognizer
*
recognizer_
=
nullptr
;
SherpaOnnxOfflineRecognizerConfig
config_
;
PaStream
*
pa_stream_
=
nullptr
;
CButton
my_btn_
;
CEdit
my_text_
;
std
::
vector
<
std
::
string
>
results_
;
public
:
bool
started_
=
false
;
std
::
vector
<
float
>
samples_
;
private
:
void
AppendTextToEditCtrl
(
const
std
::
string
&
s
);
void
AppendLineToMultilineEditCtrl
(
const
std
::
string
&
s
);
void
InitMicrophone
();
bool
Exists
(
const
std
::
string
&
filename
);
void
InitRecognizer
();
void
InitParaformer
();
void
ShowInitRecognizerHelpMessage
();
};
...
...
mfc-examples/NonStreamingSpeechRecognition/Resource.h
0 → 100644
查看文件 @
0abd7ce
//{{NO_DEPENDENCIES}}
// Microsoft Visual C++ generated include file.
// Used by NonStreamingSpeechRecognition.rc
//
#define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102
#define IDR_MAINFRAME 128
#define IDC_EDIT1 1000
// Next default values for new objects
//
#ifdef APSTUDIO_INVOKED
#ifndef APSTUDIO_READONLY_SYMBOLS
#define _APS_NEXT_RESOURCE_VALUE 130
#define _APS_NEXT_COMMAND_VALUE 32771
#define _APS_NEXT_CONTROL_VALUE 1001
#define _APS_NEXT_SYMED_VALUE 101
#endif
#endif
...
...
mfc-examples/NonStreamingSpeechRecognition/framework.h
0 → 100644
查看文件 @
0abd7ce
#pragma once
#ifndef VC_EXTRALEAN
#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
#endif
#include "targetver.h"
#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be
// explicit
// turns off MFC's hiding of some common and often safely ignored warning
// messages
#define _AFX_ALL_WARNINGS
#include <afxext.h> // MFC extensions
#include <afxwin.h> // MFC core and standard components
#ifndef _AFX_NO_OLE_SUPPORT
#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls
#endif
#ifndef _AFX_NO_AFXCMN_SUPPORT
#include <afxcmn.h> // MFC support for Windows Common Controls
#endif // _AFX_NO_AFXCMN_SUPPORT
#include <afxcontrolbars.h> // MFC support for ribbons and control bars
...
...
mfc-examples/NonStreamingSpeechRecognition/pch.cpp
0 → 100644
查看文件 @
0abd7ce
// pch.cpp: source file corresponding to the pre-compiled header
#include "pch.h"
// When you are using pre-compiled headers, this source file is necessary for
// compilation to succeed.
...
...
mfc-examples/NonStreamingSpeechRecognition/pch.h
0 → 100644
查看文件 @
0abd7ce
// pch.h: This is a precompiled header file.
// Files listed below are compiled only once, improving build performance for
// future builds. This also affects IntelliSense performance, including code
// completion and many code browsing features. However, files listed here are
// ALL re-compiled if any one of them is updated between builds. Do not add
// files here that you will be updating frequently as this negates the
// performance advantage.
#ifndef PCH_H
#define PCH_H
// add headers that you want to pre-compile here
#include "framework.h"
#endif // PCH_H
...
...
mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.ico
0 → 100644
查看文件 @
0abd7ce
不能预览此文件类型
mfc-examples/NonStreamingSpeechRecognition/res/NonStreamingSpeechRecognition.rc2
0 → 100644
查看文件 @
0abd7ce
B
//
...
...
mfc-examples/NonStreamingSpeechRecognition/sherpa-onnx-deps.props
0 → 100644
查看文件 @
0abd7ce
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup Label="UserMacros" />
<PropertyGroup>
<SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
<SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
<SherpaOnnxLibraries>
sherpa-onnx-portaudio_static.lib;
sherpa-onnx-c-api.lib;
sherpa-onnx-core.lib;
kaldi-native-fbank-core.lib;
absl_base.lib;
absl_city.lib;
absl_hash.lib;
absl_low_level_hash.lib;
absl_raw_hash_set.lib;
absl_raw_logging_internal.lib;
absl_throw_delegate.lib;
clog.lib;
cpuinfo.lib;
flatbuffers.lib;
libprotobuf-lite.lib;
onnx.lib;
onnx_proto.lib;
onnxruntime_common.lib;
onnxruntime_flatbuffers.lib;
onnxruntime_framework.lib;
onnxruntime_graph.lib;
onnxruntime_mlas.lib;
onnxruntime_optimizer.lib;
onnxruntime_providers.lib;
onnxruntime_session.lib;
onnxruntime_util.lib;
re2.lib;
</SherpaOnnxLibraries>
</PropertyGroup>
<ItemDefinitionGroup>
<ClCompile>
<AdditionalIncludeDirectories>
$(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
$(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemGroup />
</Project>
...
...
mfc-examples/NonStreamingSpeechRecognition/targetver.h
0 → 100644
查看文件 @
0abd7ce
#pragma once
// Including SDKDDKVer.h defines the highest available Windows platform.
// If you wish to build your application for a previous Windows platform,
// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
// to support before including SDKDDKVer.h.
#include <SDKDDKVer.h>
...
...
mfc-examples/README.md
查看文件 @
0abd7ce
...
...
@@ -3,11 +3,19 @@
This directory contains examples showing how to use Next-gen Kaldi in MFC
for speech recognition.
Caution: You need to use Windows and install Visual Studio in order to run it.
Caution: You need to use Windows and install Visual Studio 2022 in order to
compile it.
Hint: If you don't want to install Visual Studio, you can find below
about how to download pre-compiled
`exe`
.
We use bash script below to demonstrate how to use it. Please change
the commands accordingly for Windows.
## Streaming speech recognition
## How to compile
First, we need to compile sherpa-onnx:
```
bash
mkdir -p
$HOME
/open-source
...
...
@@ -19,7 +27,6 @@ mkdir build
cmake -DCMAKE_BUILD_TYPE
=
Release -DBUILD_SHARED_LIBS
=
OFF -DCMAKE_INSTALL_PREFIX
=
./install ..
cmake --build . --config Release --target install
cd
../mfc-examples
msbuild ./mfc-examples.sln /property:Configuration
=
Release /property:Platform
=
x64
...
...
@@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6
# now run the program
./x64/Release/StreamingSpeechRecognition.exe
./x64/Release/NonStreamingSpeechRecognition.exe
```
Note that we also need to download pre-trained models. Please
refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
for a list of streaming models.
If you don't want to compile the project by yourself, you can download
pre-compiled
`exe`
from https://github.com/k2-fsa/sherpa-onnx/releases
We use the following model for demonstration.
For instance, you can use the following addresses:
```
bash
cd
$HOME
/open-source/sherpa-onnx/mfc-examples/x64/Release
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx
wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
# now rename
mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx
mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx
mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx
# Now run it!
./StreamingSpeechRecognition.exe
```
-
https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe
-
https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe
...
...
mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognition.cpp
查看文件 @
0abd7ce
...
...
@@ -3,12 +3,14 @@
// application.
//
// clang-format off
#include "pch.h"
#include "framework.h"
// clang-format on
#include "StreamingSpeechRecognition.h"
#include "StreamingSpeechRecognitionDlg.h"
#include "StreamingSpeechRecognitionDlg.h"
#ifdef _DEBUG
#define new DEBUG_NEW
...
...
mfc-examples/StreamingSpeechRecognition/StreamingSpeechRecognitionDlg.cpp
查看文件 @
0abd7ce
// StreamingSpeechRecognitionDlg.cpp : implementation file
//
// clang-format off
#include "pch.h"
#include "framework.h"
#include "afxdialogex.h"
// clang-format on
#include "StreamingSpeechRecognitionDlg.h"
...
...
@@ -15,7 +16,6 @@
#include "StreamingSpeechRecognition.h"
#ifdef _DEBUG
#define new DEBUG_NEW
#endif
...
...
@@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() {
// exit(-1);
AppendLineToMultilineEditCtrl
(
"No default input device found!"
);
my_btn_
.
EnableWindow
(
FALSE
);
return
;
}
AppendLineToMultilineEditCtrl
(
std
::
string
(
"Selected device "
)
+
Pa_GetDeviceInfo
(
default_device
)
->
name
);
...
...
@@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() {
msg
+=
"
\r\n
"
;
msg
+=
"That's it!
\r\n
"
;
AppendLineToMultilineEditCtrl
(
msg
);
return
;
}
...
...
@@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
// put the selection at the end of text
my_text_
.
SetSel
(
nLength
,
nLength
);
// replace the selection
CString
str
;
str
.
Format
(
_T
(
"%s"
),
s
.
c_str
());
std
::
wstring
wstr
=
Utf8ToUtf16
(
s
);
...
...
mfc-examples/mfc-examples.sln
查看文件 @
0abd7ce
Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.32630.194
# Visual Studio Version 17
VisualStudioVersion = 17.6.33829.357
MinimumVisualStudioVersion = 10.0.40219.1
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|x64 = Debug|x64
...
...
@@ -21,6 +23,14 @@ Global
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
{A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
...
...
sherpa-onnx/c-api/c-api.cc
查看文件 @
0abd7ce
...
...
@@ -27,36 +27,38 @@ struct SherpaOnnxDisplay {
std
::
unique_ptr
<
sherpa_onnx
::
Display
>
impl
;
};
#define SHERPA_ONNX_OR(x, y) (x ? x : y)
SherpaOnnxOnlineRecognizer
*
CreateOnlineRecognizer
(
const
SherpaOnnxOnlineRecognizerConfig
*
config
)
{
sherpa_onnx
::
OnlineRecognizerConfig
recognizer_config
;
recognizer_config
.
feat_config
.
sampling_rate
=
config
->
feat_config
.
sample_rate
;
recognizer_config
.
feat_config
.
feature_dim
=
config
->
feat_config
.
feature_dim
;
recognizer_config
.
feat_config
.
sampling_rate
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
sample_rate
,
16000
);
recognizer_config
.
feat_config
.
feature_dim
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
feature_dim
,
80
);
recognizer_config
.
model_config
.
encoder_filename
=
config
->
model_config
.
encoder
;
SHERPA_ONNX_OR
(
config
->
model_config
.
encoder
,
""
)
;
recognizer_config
.
model_config
.
decoder_filename
=
config
->
model_config
.
decoder
;
recognizer_config
.
model_config
.
joiner_filename
=
config
->
model_config
.
joiner
;
recognizer_config
.
model_config
.
tokens
=
config
->
model_config
.
tokens
;
recognizer_config
.
model_config
.
num_threads
=
config
->
model_config
.
num_threads
;
recognizer_config
.
model_config
.
provider
=
config
->
model_config
.
provider
;
recognizer_config
.
model_config
.
debug
=
config
->
model_config
.
debug
;
SHERPA_ONNX_OR
(
config
->
model_config
.
decoder
,
""
);
recognizer_config
.
model_config
.
joiner_filename
=
SHERPA_ONNX_OR
(
config
->
model_config
.
joiner
,
""
);
recognizer_config
.
model_config
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model_config
.
tokens
,
""
);
recognizer_config
.
model_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
provider
=
SHERPA_ONNX_OR
(
config
->
model_config
.
provider
,
"cpu"
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
decoding_method
=
config
->
decoding_method
;
recognizer_config
.
max_active_paths
=
config
->
max_active_paths
;
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
recognizer_config
.
enable_endpoint
=
config
->
enable_endpoint
;
recognizer_config
.
enable_endpoint
=
SHERPA_ONNX_OR
(
config
->
enable_endpoint
,
0
)
;
recognizer_config
.
endpoint_config
.
rule1
.
min_trailing_silence
=
config
->
rule1_min_trailing_silence
;
SHERPA_ONNX_OR
(
config
->
rule1_min_trailing_silence
,
2.4
)
;
recognizer_config
.
endpoint_config
.
rule2
.
min_trailing_silence
=
config
->
rule2_min_trailing_silence
;
SHERPA_ONNX_OR
(
config
->
rule2_min_trailing_silence
,
1.2
)
;
recognizer_config
.
endpoint_config
.
rule3
.
min_utterance_length
=
config
->
rule3_min_utterance_length
;
SHERPA_ONNX_OR
(
config
->
rule3_min_utterance_length
,
20
)
;
if
(
config
->
model_config
.
debug
)
{
fprintf
(
stderr
,
"%s
\n
"
,
recognizer_config
.
ToString
().
c_str
());
...
...
@@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
const
SherpaOnnxOfflineRecognizerConfig
*
config
)
{
sherpa_onnx
::
OfflineRecognizerConfig
recognizer_config
;
recognizer_config
.
feat_config
.
sampling_rate
=
config
->
feat_config
.
sample_rate
;
recognizer_config
.
feat_config
.
sampling_rate
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
sample_rate
,
16000
)
;
recognizer_config
.
feat_config
.
feature_dim
=
config
->
feat_config
.
feature_dim
;
recognizer_config
.
feat_config
.
feature_dim
=
SHERPA_ONNX_OR
(
config
->
feat_config
.
feature_dim
,
80
)
;
recognizer_config
.
model_config
.
transducer
.
encoder_filename
=
config
->
model_config
.
transducer
.
encoder
;
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
encoder
,
""
)
;
recognizer_config
.
model_config
.
transducer
.
decoder_filename
=
config
->
model_config
.
transducer
.
decoder
;
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
decoder
,
""
)
;
recognizer_config
.
model_config
.
transducer
.
joiner_filename
=
config
->
model_config
.
transducer
.
joiner
;
SHERPA_ONNX_OR
(
config
->
model_config
.
transducer
.
joiner
,
""
)
;
recognizer_config
.
model_config
.
paraformer
.
model
=
config
->
model_config
.
paraformer
.
model
;
SHERPA_ONNX_OR
(
config
->
model_config
.
paraformer
.
model
,
""
)
;
recognizer_config
.
model_config
.
nemo_ctc
.
model
=
config
->
model_config
.
nemo_ctc
.
model
;
SHERPA_ONNX_OR
(
config
->
model_config
.
nemo_ctc
.
model
,
""
)
;
recognizer_config
.
model_config
.
tokens
=
config
->
model_config
.
tokens
;
recognizer_config
.
model_config
.
num_threads
=
config
->
model_config
.
num_threads
;
recognizer_config
.
model_config
.
debug
=
config
->
model_config
.
debug
;
recognizer_config
.
model_config
.
tokens
=
SHERPA_ONNX_OR
(
config
->
model_config
.
tokens
,
""
);
recognizer_config
.
model_config
.
num_threads
=
SHERPA_ONNX_OR
(
config
->
model_config
.
num_threads
,
1
);
recognizer_config
.
model_config
.
debug
=
SHERPA_ONNX_OR
(
config
->
model_config
.
debug
,
0
);
recognizer_config
.
lm_config
.
model
=
config
->
lm_config
.
model
;
recognizer_config
.
lm_config
.
scale
=
config
->
lm_config
.
scale
;
recognizer_config
.
lm_config
.
model
=
SHERPA_ONNX_OR
(
config
->
lm_config
.
model
,
""
);
recognizer_config
.
lm_config
.
scale
=
SHERPA_ONNX_OR
(
config
->
lm_config
.
scale
,
1.0
);
recognizer_config
.
decoding_method
=
config
->
decoding_method
;
recognizer_config
.
max_active_paths
=
config
->
max_active_paths
;
recognizer_config
.
decoding_method
=
SHERPA_ONNX_OR
(
config
->
decoding_method
,
"greedy_search"
);
recognizer_config
.
max_active_paths
=
SHERPA_ONNX_OR
(
config
->
max_active_paths
,
4
);
if
(
config
->
model_config
.
debug
)
{
fprintf
(
stderr
,
"%s
\n
"
,
recognizer_config
.
ToString
().
c_str
());
...
...
请
注册
或
登录
后发表评论