Fangjun Kuang
Committed by GitHub

Add non-streaming speech recognition examples for MFC (#212)

@@ -98,6 +98,7 @@ jobs: @@ -98,6 +98,7 @@ jobs:
98 98
99 cd mfc-examples/$arch/Release 99 cd mfc-examples/$arch/Release
100 cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe 100 cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
  101 + cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
101 ls -lh 102 ls -lh
102 103
103 - name: Upload artifact 104 - name: Upload artifact
@@ -106,10 +107,24 @@ jobs: @@ -106,10 +107,24 @@ jobs:
106 name: streaming-speech-recognition-${{ matrix.arch }} 107 name: streaming-speech-recognition-${{ matrix.arch }}
107 path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe 108 path: ./mfc-examples/${{ matrix.arch }}/Release/StreamingSpeechRecognition.exe
108 109
109 - - name: Release pre-compiled binaries and libs for macOS 110 + - name: Upload artifact
  111 + uses: actions/upload-artifact@v2
  112 + with:
  113 + name: non-streaming-speech-recognition-${{ matrix.arch }}
  114 + path: ./mfc-examples/${{ matrix.arch }}/Release/NonStreamingSpeechRecognition.exe
  115 +
  116 + - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
  117 + if: env.RELEASE == 'true'
  118 + uses: svenstaro/upload-release-action@v2
  119 + with:
  120 + file_glob: true
  121 + overwrite: true
  122 + file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-streaming-*.exe
  123 +
  124 + - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
110 if: env.RELEASE == 'true' 125 if: env.RELEASE == 'true'
111 uses: svenstaro/upload-release-action@v2 126 uses: svenstaro/upload-release-action@v2
112 with: 127 with:
113 file_glob: true 128 file_glob: true
114 overwrite: true 129 overwrite: true
115 - file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx*.exe 130 + file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe
@@ -113,7 +113,7 @@ function(download_onnxruntime) @@ -113,7 +113,7 @@ function(download_onnxruntime)
113 113
114 set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2") 114 set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x86-static-1.15.1.tar.bz2")
115 set(onnxruntime_URL2 "") 115 set(onnxruntime_URL2 "")
116 - set(onnxruntime_HASH "SHA256=a2b33a3e8a1f89cddf303f0a97a5a88f4202579c653cfb29158c8cf7da3734eb") 116 + set(onnxruntime_HASH "SHA256=94d9a30976b5c4a5dff7508d00f141835916e5a36315d5f53be9b3edb85148b5")
117 endif() 117 endif()
118 118
119 if(SHERPA_ONNX_ENABLE_GPU) 119 if(SHERPA_ONNX_ENABLE_GPU)
@@ -161,7 +161,7 @@ function(download_onnxruntime) @@ -161,7 +161,7 @@ function(download_onnxruntime)
161 161
162 set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2") 162 set(onnxruntime_URL "https://huggingface.co/csukuangfj/onnxruntime-libs/resolve/main/onnxruntime-win-x64-static-1.15.1.tar.bz2")
163 set(onnxruntime_URL2 "") 163 set(onnxruntime_URL2 "")
164 - set(onnxruntime_HASH "SHA256=f5c19ac1fc6a61c78a231a41df10aede2586665ab397bdc3f007eb8d2c8d4a19") 164 + set(onnxruntime_HASH "SHA256=c809a8510a89b8b37ae7d563c39229db22bac8fbefcbfe5c81a60b367d065b1b")
165 endif() 165 endif()
166 endif() 166 endif()
167 # After downloading, it contains: 167 # After downloading, it contains:
  1 +
  2 +// NonStreamingSpeechRecognition.cpp : Defines the class behaviors for the
  3 +// application.
  4 +//
  5 +
  6 +// clang-format off
  7 +#include "pch.h"
  8 +#include "framework.h"
  9 +#include "NonStreamingSpeechRecognitionDlg.h"
  10 +#include "NonStreamingSpeechRecognition.h"
  11 +// clang-format on
  12 +
  13 +#ifdef _DEBUG
  14 +#define new DEBUG_NEW
  15 +#endif
  16 +
  17 +// CNonStreamingSpeechRecognitionApp
  18 +
  19 +BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionApp, CWinApp)
  20 +ON_COMMAND(ID_HELP, &CWinApp::OnHelp)
  21 +END_MESSAGE_MAP()
  22 +
  23 +// CNonStreamingSpeechRecognitionApp construction
  24 +
  25 +CNonStreamingSpeechRecognitionApp::CNonStreamingSpeechRecognitionApp() {
  26 + // TODO: add construction code here,
  27 + // Place all significant initialization in InitInstance
  28 +}
  29 +
  30 +// The one and only CNonStreamingSpeechRecognitionApp object
  31 +
  32 +CNonStreamingSpeechRecognitionApp theApp;
  33 +
  34 +// CNonStreamingSpeechRecognitionApp initialization
  35 +
  36 +BOOL CNonStreamingSpeechRecognitionApp::InitInstance() {
  37 + CWinApp::InitInstance();
  38 +
  39 + // Create the shell manager, in case the dialog contains
  40 + // any shell tree view or shell list view controls.
  41 + CShellManager *pShellManager = new CShellManager;
  42 +
  43 + // Activate "Windows Native" visual manager for enabling themes in MFC
  44 + // controls
  45 + CMFCVisualManager::SetDefaultManager(RUNTIME_CLASS(CMFCVisualManagerWindows));
  46 +
  47 + // Standard initialization
  48 + // If you are not using these features and wish to reduce the size
  49 + // of your final executable, you should remove from the following
  50 + // the specific initialization routines you do not need
  51 + // Change the registry key under which our settings are stored
  52 + // TODO: You should modify this string to be something appropriate
  53 + // such as the name of your company or organization
  54 + SetRegistryKey(_T("Local AppWizard-Generated Applications"));
  55 +
  56 + CNonStreamingSpeechRecognitionDlg dlg;
  57 + m_pMainWnd = &dlg;
  58 + INT_PTR nResponse = dlg.DoModal();
  59 + if (nResponse == IDOK) {
  60 + // TODO: Place code here to handle when the dialog is
  61 + // dismissed with OK
  62 + } else if (nResponse == IDCANCEL) {
  63 + // TODO: Place code here to handle when the dialog is
  64 + // dismissed with Cancel
  65 + } else if (nResponse == -1) {
  66 + TRACE(traceAppMsg, 0,
  67 + "Warning: dialog creation failed, so application is terminating "
  68 + "unexpectedly.\n");
  69 + TRACE(traceAppMsg, 0,
  70 + "Warning: if you are using MFC controls on the dialog, you cannot "
  71 + "#define _AFX_NO_MFC_CONTROLS_IN_DIALOGS.\n");
  72 + }
  73 +
  74 + // Delete the shell manager created above.
  75 + if (pShellManager != nullptr) {
  76 + delete pShellManager;
  77 + }
  78 +
  79 +#if !defined(_AFXDLL) && !defined(_AFX_NO_MFC_CONTROLS_IN_DIALOGS)
  80 + ControlBarCleanUp();
  81 +#endif
  82 +
  83 + // Since the dialog has been closed, return FALSE so that we exit the
  84 + // application, rather than start the application's message pump.
  85 + return FALSE;
  86 +}
  1 +
  2 +// NonStreamingSpeechRecognition.h : main header file for the PROJECT_NAME
  3 +// application
  4 +//
  5 +
  6 +#pragma once
  7 +
  8 +#ifndef __AFXWIN_H__
  9 +#error "include 'pch.h' before including this file for PCH"
  10 +#endif
  11 +
  12 +#include "resource.h" // main symbols
  13 +
  14 +// CNonStreamingSpeechRecognitionApp:
  15 +// See NonStreamingSpeechRecognition.cpp for the implementation of this class
  16 +//
  17 +
  18 +class CNonStreamingSpeechRecognitionApp : public CWinApp {
  19 + public:
  20 + CNonStreamingSpeechRecognitionApp();
  21 +
  22 + // Overrides
  23 + public:
  24 + virtual BOOL InitInstance();
  25 +
  26 + // Implementation
  27 +
  28 + DECLARE_MESSAGE_MAP()
  29 +};
  30 +
  31 +extern CNonStreamingSpeechRecognitionApp theApp;
1 B// Microsoft Visual C++ generated resource script. 1 B// Microsoft Visual C++ generated resource script.
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 + <ItemGroup Label="ProjectConfigurations">
  4 + <ProjectConfiguration Include="Debug|Win32">
  5 + <Configuration>Debug</Configuration>
  6 + <Platform>Win32</Platform>
  7 + </ProjectConfiguration>
  8 + <ProjectConfiguration Include="Release|Win32">
  9 + <Configuration>Release</Configuration>
  10 + <Platform>Win32</Platform>
  11 + </ProjectConfiguration>
  12 + <ProjectConfiguration Include="Debug|x64">
  13 + <Configuration>Debug</Configuration>
  14 + <Platform>x64</Platform>
  15 + </ProjectConfiguration>
  16 + <ProjectConfiguration Include="Release|x64">
  17 + <Configuration>Release</Configuration>
  18 + <Platform>x64</Platform>
  19 + </ProjectConfiguration>
  20 + </ItemGroup>
  21 + <PropertyGroup Label="Globals">
  22 + <VCProjectVersion>17.0</VCProjectVersion>
  23 + <ProjectGuid>{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}</ProjectGuid>
  24 + <Keyword>MFCProj</Keyword>
  25 + <RootNamespace>NonStreamingSpeechRecognition</RootNamespace>
  26 + <WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
  27 + </PropertyGroup>
  28 + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
  29 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
  30 + <ConfigurationType>Application</ConfigurationType>
  31 + <UseDebugLibraries>true</UseDebugLibraries>
  32 + <PlatformToolset>v143</PlatformToolset>
  33 + <CharacterSet>Unicode</CharacterSet>
  34 + <UseOfMfc>Static</UseOfMfc>
  35 + </PropertyGroup>
  36 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
  37 + <ConfigurationType>Application</ConfigurationType>
  38 + <UseDebugLibraries>false</UseDebugLibraries>
  39 + <PlatformToolset>v143</PlatformToolset>
  40 + <WholeProgramOptimization>true</WholeProgramOptimization>
  41 + <CharacterSet>Unicode</CharacterSet>
  42 + <UseOfMfc>Static</UseOfMfc>
  43 + </PropertyGroup>
  44 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
  45 + <ConfigurationType>Application</ConfigurationType>
  46 + <UseDebugLibraries>true</UseDebugLibraries>
  47 + <PlatformToolset>v143</PlatformToolset>
  48 + <CharacterSet>Unicode</CharacterSet>
  49 + <UseOfMfc>Static</UseOfMfc>
  50 + </PropertyGroup>
  51 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
  52 + <ConfigurationType>Application</ConfigurationType>
  53 + <UseDebugLibraries>false</UseDebugLibraries>
  54 + <PlatformToolset>v143</PlatformToolset>
  55 + <WholeProgramOptimization>true</WholeProgramOptimization>
  56 + <CharacterSet>Unicode</CharacterSet>
  57 + <UseOfMfc>Static</UseOfMfc>
  58 + </PropertyGroup>
  59 + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
  60 + <ImportGroup Label="ExtensionSettings">
  61 + </ImportGroup>
  62 + <ImportGroup Label="Shared">
  63 + </ImportGroup>
  64 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
  65 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  66 + <Import Project="sherpa-onnx-deps.props" />
  67 + </ImportGroup>
  68 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
  69 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  70 + <Import Project="sherpa-onnx-deps.props" />
  71 + </ImportGroup>
  72 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
  73 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  74 + <Import Project="sherpa-onnx-deps.props" />
  75 + </ImportGroup>
  76 + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
  77 + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
  78 + <Import Project="sherpa-onnx-deps.props" />
  79 + </ImportGroup>
  80 + <PropertyGroup Label="UserMacros" />
  81 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
  82 + <LinkIncremental>false</LinkIncremental>
  83 + </PropertyGroup>
  84 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
  85 + <LinkIncremental>true</LinkIncremental>
  86 + </PropertyGroup>
  87 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
  88 + <LinkIncremental>true</LinkIncremental>
  89 + </PropertyGroup>
  90 + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
  91 + <LinkIncremental>false</LinkIncremental>
  92 + </PropertyGroup>
  93 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
  94 + <ClCompile>
  95 + <PrecompiledHeader>Use</PrecompiledHeader>
  96 + <WarningLevel>Level3</WarningLevel>
  97 + <FunctionLevelLinking>true</FunctionLevelLinking>
  98 + <IntrinsicFunctions>true</IntrinsicFunctions>
  99 + <SDLCheck>true</SDLCheck>
  100 + <PreprocessorDefinitions>_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  101 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  102 + </ClCompile>
  103 + <Link>
  104 + <SubSystem>Windows</SubSystem>
  105 + <EnableCOMDATFolding>true</EnableCOMDATFolding>
  106 + <OptimizeReferences>true</OptimizeReferences>
  107 + </Link>
  108 + <Midl>
  109 + <MkTypLibCompatible>false</MkTypLibCompatible>
  110 + <ValidateAllParameters>true</ValidateAllParameters>
  111 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  112 + </Midl>
  113 + <ResourceCompile>
  114 + <Culture>0x0409</Culture>
  115 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  116 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  117 + </ResourceCompile>
  118 + </ItemDefinitionGroup>
  119 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
  120 + <ClCompile>
  121 + <PrecompiledHeader>Use</PrecompiledHeader>
  122 + <WarningLevel>Level3</WarningLevel>
  123 + <SDLCheck>true</SDLCheck>
  124 + <PreprocessorDefinitions>WIN32;_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  125 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  126 + </ClCompile>
  127 + <Link>
  128 + <SubSystem>Windows</SubSystem>
  129 + </Link>
  130 + <Midl>
  131 + <MkTypLibCompatible>false</MkTypLibCompatible>
  132 + <ValidateAllParameters>true</ValidateAllParameters>
  133 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  134 + </Midl>
  135 + <ResourceCompile>
  136 + <Culture>0x0409</Culture>
  137 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  138 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  139 + </ResourceCompile>
  140 + </ItemDefinitionGroup>
  141 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
  142 + <ClCompile>
  143 + <PrecompiledHeader>Use</PrecompiledHeader>
  144 + <WarningLevel>Level3</WarningLevel>
  145 + <SDLCheck>true</SDLCheck>
  146 + <PreprocessorDefinitions>_WINDOWS;_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  147 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  148 + </ClCompile>
  149 + <Link>
  150 + <SubSystem>Windows</SubSystem>
  151 + </Link>
  152 + <Midl>
  153 + <MkTypLibCompatible>false</MkTypLibCompatible>
  154 + <ValidateAllParameters>true</ValidateAllParameters>
  155 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  156 + </Midl>
  157 + <ResourceCompile>
  158 + <Culture>0x0409</Culture>
  159 + <PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  160 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  161 + </ResourceCompile>
  162 + </ItemDefinitionGroup>
  163 + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
  164 + <ClCompile>
  165 + <PrecompiledHeader>Use</PrecompiledHeader>
  166 + <WarningLevel>Level3</WarningLevel>
  167 + <FunctionLevelLinking>true</FunctionLevelLinking>
  168 + <IntrinsicFunctions>true</IntrinsicFunctions>
  169 + <SDLCheck>true</SDLCheck>
  170 + <PreprocessorDefinitions>WIN32;_WINDOWS;NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  171 + <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
  172 + </ClCompile>
  173 + <Link>
  174 + <SubSystem>Windows</SubSystem>
  175 + <EnableCOMDATFolding>true</EnableCOMDATFolding>
  176 + <OptimizeReferences>true</OptimizeReferences>
  177 + </Link>
  178 + <Midl>
  179 + <MkTypLibCompatible>false</MkTypLibCompatible>
  180 + <ValidateAllParameters>true</ValidateAllParameters>
  181 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  182 + </Midl>
  183 + <ResourceCompile>
  184 + <Culture>0x0409</Culture>
  185 + <PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
  186 + <AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  187 + </ResourceCompile>
  188 + </ItemDefinitionGroup>
  189 + <ItemGroup>
  190 + <ClInclude Include="framework.h" />
  191 + <ClInclude Include="NonStreamingSpeechRecognition.h" />
  192 + <ClInclude Include="NonStreamingSpeechRecognitionDlg.h" />
  193 + <ClInclude Include="pch.h" />
  194 + <ClInclude Include="Resource.h" />
  195 + <ClInclude Include="targetver.h" />
  196 + </ItemGroup>
  197 + <ItemGroup>
  198 + <ClCompile Include="NonStreamingSpeechRecognition.cpp" />
  199 + <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp" />
  200 + <ClCompile Include="pch.cpp">
  201 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Create</PrecompiledHeader>
  202 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Create</PrecompiledHeader>
  203 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Create</PrecompiledHeader>
  204 + <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Create</PrecompiledHeader>
  205 + </ClCompile>
  206 + </ItemGroup>
  207 + <ItemGroup>
  208 + <ResourceCompile Include="NonStreamingSpeechRecognition.rc" />
  209 + </ItemGroup>
  210 + <ItemGroup>
  211 + <None Include="res\NonStreamingSpeechRecognition.rc2" />
  212 + </ItemGroup>
  213 + <ItemGroup>
  214 + <Image Include="res\NonStreamingSpeechRecognition.ico" />
  215 + </ItemGroup>
  216 + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  217 + <ImportGroup Label="ExtensionTargets">
  218 + </ImportGroup>
  219 +</Project>
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 + <ItemGroup>
  4 + <Filter Include="Source Files">
  5 + <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
  6 + <Extensions>cpp;c;cc;cxx;c++;cppm;ixx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
  7 + </Filter>
  8 + <Filter Include="Header Files">
  9 + <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
  10 + <Extensions>h;hh;hpp;hxx;h++;hm;inl;inc;ipp;xsd</Extensions>
  11 + </Filter>
  12 + <Filter Include="Resource Files">
  13 + <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
  14 + <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
  15 + </Filter>
  16 + </ItemGroup>
  17 + <ItemGroup>
  18 + <ClInclude Include="NonStreamingSpeechRecognition.h">
  19 + <Filter>Header Files</Filter>
  20 + </ClInclude>
  21 + <ClInclude Include="NonStreamingSpeechRecognitionDlg.h">
  22 + <Filter>Header Files</Filter>
  23 + </ClInclude>
  24 + <ClInclude Include="framework.h">
  25 + <Filter>Header Files</Filter>
  26 + </ClInclude>
  27 + <ClInclude Include="targetver.h">
  28 + <Filter>Header Files</Filter>
  29 + </ClInclude>
  30 + <ClInclude Include="Resource.h">
  31 + <Filter>Header Files</Filter>
  32 + </ClInclude>
  33 + <ClInclude Include="pch.h">
  34 + <Filter>Header Files</Filter>
  35 + </ClInclude>
  36 + </ItemGroup>
  37 + <ItemGroup>
  38 + <ClCompile Include="NonStreamingSpeechRecognition.cpp">
  39 + <Filter>Source Files</Filter>
  40 + </ClCompile>
  41 + <ClCompile Include="NonStreamingSpeechRecognitionDlg.cpp">
  42 + <Filter>Source Files</Filter>
  43 + </ClCompile>
  44 + <ClCompile Include="pch.cpp">
  45 + <Filter>Source Files</Filter>
  46 + </ClCompile>
  47 + </ItemGroup>
  48 + <ItemGroup>
  49 + <ResourceCompile Include="NonStreamingSpeechRecognition.rc">
  50 + <Filter>Resource Files</Filter>
  51 + </ResourceCompile>
  52 + </ItemGroup>
  53 + <ItemGroup>
  54 + <None Include="res\NonStreamingSpeechRecognition.rc2">
  55 + <Filter>Resource Files</Filter>
  56 + </None>
  57 + </ItemGroup>
  58 + <ItemGroup>
  59 + <Image Include="res\NonStreamingSpeechRecognition.ico">
  60 + <Filter>Resource Files</Filter>
  61 + </Image>
  62 + </ItemGroup>
  63 +</Project>
  1 +
  2 +// NonStreamingSpeechRecognitionDlg.cpp : implementation file
  3 +//
  4 +
  5 +// clang-format off
  6 +#include "pch.h"
  7 +#include "framework.h"
  8 +#include "afxdialogex.h"
  9 +#include "NonStreamingSpeechRecognition.h"
  10 +#include "NonStreamingSpeechRecognitionDlg.h"
  11 +// clang-format on
  12 +
  13 +#include <fstream>
  14 +#include <sstream>
  15 +#include <string>
  16 +#include <vector>
  17 +
  18 +#ifdef _DEBUG
  19 +#define new DEBUG_NEW
  20 +#endif
  21 +
  22 +Microphone::Microphone() {
  23 + PaError err = Pa_Initialize();
  24 + if (err != paNoError) {
  25 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  26 + exit(-2);
  27 + }
  28 +}
  29 +
  30 +Microphone::~Microphone() {
  31 + PaError err = Pa_Terminate();
  32 + if (err != paNoError) {
  33 + fprintf(stderr, "portaudio error: %s\n", Pa_GetErrorText(err));
  34 + exit(-2);
  35 + }
  36 +}
  37 +
  38 +// see
  39 +// https://stackoverflow.com/questions/7153935/how-to-convert-utf-8-stdstring-to-utf-16-stdwstring
  40 +static std::wstring Utf8ToUtf16(const std::string &utf8) {
  41 + std::vector<unsigned long> unicode;
  42 + size_t i = 0;
  43 + while (i < utf8.size()) {
  44 + unsigned long uni;
  45 + size_t todo;
  46 + bool error = false;
  47 + unsigned char ch = utf8[i++];
  48 + if (ch <= 0x7F) {
  49 + uni = ch;
  50 + todo = 0;
  51 + } else if (ch <= 0xBF) {
  52 + throw std::logic_error("not a UTF-8 string");
  53 + } else if (ch <= 0xDF) {
  54 + uni = ch & 0x1F;
  55 + todo = 1;
  56 + } else if (ch <= 0xEF) {
  57 + uni = ch & 0x0F;
  58 + todo = 2;
  59 + } else if (ch <= 0xF7) {
  60 + uni = ch & 0x07;
  61 + todo = 3;
  62 + } else {
  63 + throw std::logic_error("not a UTF-8 string");
  64 + }
  65 + for (size_t j = 0; j < todo; ++j) {
  66 + if (i == utf8.size()) throw std::logic_error("not a UTF-8 string");
  67 + unsigned char ch = utf8[i++];
  68 + if (ch < 0x80 || ch > 0xBF) throw std::logic_error("not a UTF-8 string");
  69 + uni <<= 6;
  70 + uni += ch & 0x3F;
  71 + }
  72 + if (uni >= 0xD800 && uni <= 0xDFFF)
  73 + throw std::logic_error("not a UTF-8 string");
  74 + if (uni > 0x10FFFF) throw std::logic_error("not a UTF-8 string");
  75 + unicode.push_back(uni);
  76 + }
  77 + std::wstring utf16;
  78 + for (size_t i = 0; i < unicode.size(); ++i) {
  79 + unsigned long uni = unicode[i];
  80 + if (uni <= 0xFFFF) {
  81 + utf16 += (wchar_t)uni;
  82 + } else {
  83 + uni -= 0x10000;
  84 + utf16 += (wchar_t)((uni >> 10) + 0xD800);
  85 + utf16 += (wchar_t)((uni & 0x3FF) + 0xDC00);
  86 + }
  87 + }
  88 + return utf16;
  89 +}
  90 +
  91 +static std::string Cat(const std::vector<std::string> &results) {
  92 + std::ostringstream os;
  93 + std::string sep;
  94 +
  95 + int i = 0;
  96 + for (i = 0; i != results.size(); ++i) {
  97 + os << sep << i << ": " << results[i];
  98 + sep = "\r\n";
  99 + }
  100 +
  101 + return os.str();
  102 +}
  103 +
  104 +// CNonStreamingSpeechRecognitionDlg dialog
  105 +
  106 +CNonStreamingSpeechRecognitionDlg::CNonStreamingSpeechRecognitionDlg(
  107 + CWnd *pParent /*=nullptr*/)
  108 + : CDialogEx(IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG, pParent) {
  109 + m_hIcon = AfxGetApp()->LoadIcon(IDR_MAINFRAME);
  110 +}
  111 +
  112 +CNonStreamingSpeechRecognitionDlg::~CNonStreamingSpeechRecognitionDlg() {
  113 + if (recognizer_) {
  114 + DestroyOfflineRecognizer(recognizer_);
  115 + recognizer_ = nullptr;
  116 + }
  117 +}
  118 +
  119 +void CNonStreamingSpeechRecognitionDlg::DoDataExchange(CDataExchange *pDX) {
  120 + CDialogEx::DoDataExchange(pDX);
  121 + DDX_Control(pDX, IDC_EDIT1, my_text_);
  122 + DDX_Control(pDX, IDOK, my_btn_);
  123 +}
  124 +
  125 +BEGIN_MESSAGE_MAP(CNonStreamingSpeechRecognitionDlg, CDialogEx)
  126 +ON_WM_PAINT()
  127 +ON_WM_QUERYDRAGICON()
  128 +ON_BN_CLICKED(IDOK, &CNonStreamingSpeechRecognitionDlg::OnBnClickedOk)
  129 +END_MESSAGE_MAP()
  130 +
  131 +// CNonStreamingSpeechRecognitionDlg message handlers
  132 +
  133 +BOOL CNonStreamingSpeechRecognitionDlg::OnInitDialog() {
  134 + CDialogEx::OnInitDialog();
  135 +
  136 + // Set the icon for this dialog. The framework does this automatically
  137 + // when the application's main window is not a dialog
  138 + SetIcon(m_hIcon, TRUE); // Set big icon
  139 + SetIcon(m_hIcon, FALSE); // Set small icon
  140 +
  141 + // TODO: Add extra initialization here
  142 + InitMicrophone();
  143 +
  144 + return TRUE; // return TRUE unless you set the focus to a control
  145 +}
  146 +
  147 +// If you add a minimize button to your dialog, you will need the code below
  148 +// to draw the icon. For MFC applications using the document/view model,
  149 +// this is automatically done for you by the framework.
  150 +
  151 +void CNonStreamingSpeechRecognitionDlg::OnPaint() {
  152 + if (IsIconic()) {
  153 + CPaintDC dc(this); // device context for painting
  154 +
  155 + SendMessage(WM_ICONERASEBKGND, reinterpret_cast<WPARAM>(dc.GetSafeHdc()),
  156 + 0);
  157 +
  158 + // Center icon in client rectangle
  159 + int cxIcon = GetSystemMetrics(SM_CXICON);
  160 + int cyIcon = GetSystemMetrics(SM_CYICON);
  161 + CRect rect;
  162 + GetClientRect(&rect);
  163 + int x = (rect.Width() - cxIcon + 1) / 2;
  164 + int y = (rect.Height() - cyIcon + 1) / 2;
  165 +
  166 + // Draw the icon
  167 + dc.DrawIcon(x, y, m_hIcon);
  168 + } else {
  169 + CDialogEx::OnPaint();
  170 + }
  171 +}
  172 +
  173 +// The system calls this function to obtain the cursor to display while the user
  174 +// drags
  175 +// the minimized window.
  176 +HCURSOR CNonStreamingSpeechRecognitionDlg::OnQueryDragIcon() {
  177 + return static_cast<HCURSOR>(m_hIcon);
  178 +}
  179 +
  180 +static int32_t RecordCallback(const void *input_buffer,
  181 + void * /*output_buffer*/,
  182 + unsigned long frames_per_buffer, // NOLINT
  183 + const PaStreamCallbackTimeInfo * /*time_info*/,
  184 + PaStreamCallbackFlags /*status_flags*/,
  185 + void *user_data) {
  186 + auto dlg = reinterpret_cast<CNonStreamingSpeechRecognitionDlg *>(user_data);
  187 + auto begin = reinterpret_cast<const float *>(input_buffer);
  188 + auto end = begin + frames_per_buffer;
  189 + dlg->samples_.insert(dlg->samples_.end(), begin, end);
  190 +
  191 + return dlg->started_ ? paContinue : paComplete;
  192 +}
  193 +
  194 +void CNonStreamingSpeechRecognitionDlg::OnBnClickedOk() {
  195 + if (!recognizer_) {
  196 + AppendLineToMultilineEditCtrl("Creating recognizer...");
  197 + AppendLineToMultilineEditCtrl("It will take several seconds. Please wait");
  198 + InitRecognizer();
  199 + if (!recognizer_) {
  200 + // failed to create the recognizer
  201 + return;
  202 + }
  203 + AppendLineToMultilineEditCtrl("Recognizer created!");
  204 + }
  205 +
  206 + if (!started_) {
  207 + samples_.clear();
  208 + started_ = true;
  209 +
  210 + PaStreamParameters param;
  211 + param.device = Pa_GetDefaultInputDevice();
  212 + const PaDeviceInfo *info = Pa_GetDeviceInfo(param.device);
  213 + param.channelCount = 1;
  214 + param.sampleFormat = paFloat32;
  215 + param.suggestedLatency = info->defaultLowInputLatency;
  216 + param.hostApiSpecificStreamInfo = nullptr;
  217 + float sample_rate = config_.feat_config.sample_rate;
  218 + pa_stream_ = nullptr;
  219 + PaError err =
  220 + Pa_OpenStream(&pa_stream_, &param, nullptr, /* &outputParameters, */
  221 + sample_rate,
  222 + 0, // frames per buffer
  223 + paClipOff, // we won't output out of range samples
  224 + // so don't bother clipping them
  225 + RecordCallback, this);
  226 + if (err != paNoError) {
  227 + AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
  228 + Pa_GetErrorText(err));
  229 + my_btn_.EnableWindow(FALSE);
  230 + return;
  231 + }
  232 +
  233 + err = Pa_StartStream(pa_stream_);
  234 + if (err != paNoError) {
  235 + AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
  236 + Pa_GetErrorText(err));
  237 + my_btn_.EnableWindow(FALSE);
  238 + return;
  239 + }
  240 + AppendLineToMultilineEditCtrl(
  241 + "\r\nStarted! Please speak and click stop.\r\n");
  242 + my_btn_.SetWindowText(_T("Stop"));
  243 +
  244 + } else {
  245 + started_ = false;
  246 +
  247 + Pa_Sleep(200); // sleep for 200ms
  248 + if (pa_stream_) {
  249 + PaError err = Pa_CloseStream(pa_stream_);
  250 + if (err != paNoError) {
  251 + AppendLineToMultilineEditCtrl(std::string("PortAudio error: ") +
  252 + Pa_GetErrorText(err));
  253 + my_btn_.EnableWindow(FALSE);
  254 + return;
  255 + }
  256 + }
  257 + pa_stream_ = nullptr;
  258 +
  259 + SherpaOnnxOfflineStream *stream = CreateOfflineStream(recognizer_);
  260 +
  261 + AcceptWaveformOffline(stream, config_.feat_config.sample_rate,
  262 + samples_.data(), samples_.size());
  263 + DecodeOfflineStream(recognizer_, stream);
  264 + SherpaOnnxOfflineRecognizerResult *r = GetOfflineStreamResult(stream);
  265 + results_.emplace_back(r->text);
  266 +
  267 + auto str = Utf8ToUtf16(Cat(results_).c_str());
  268 + my_text_.SetWindowText(str.c_str());
  269 + my_text_.SetFocus();
  270 + my_text_.SetSel(-1);
  271 +
  272 + DestroyOfflineRecognizerResult(r);
  273 +
  274 + DestroyOfflineStream(stream);
  275 + // AfxMessageBox("Stopped", MB_OK);
  276 + my_btn_.SetWindowText(_T("Start"));
  277 + AppendLineToMultilineEditCtrl("\r\nStopped. Please click start and speak");
  278 + }
  279 +}
  280 +
  281 +void CNonStreamingSpeechRecognitionDlg::InitMicrophone() {
  282 + int default_device = Pa_GetDefaultInputDevice();
  283 + int device_count = Pa_GetDeviceCount();
  284 + if (default_device == paNoDevice) {
  285 + // CString str;
  286 + // str.Format(_T("No default input device found!"));
  287 + // AfxMessageBox(str, MB_OK | MB_ICONSTOP);
  288 + // exit(-1);
  289 + AppendLineToMultilineEditCtrl("No default input device found!");
  290 + my_btn_.EnableWindow(FALSE);
  291 + return;
  292 + }
  293 + AppendLineToMultilineEditCtrl(std::string("Selected device ") +
  294 + Pa_GetDeviceInfo(default_device)->name);
  295 +}
  296 +
  297 +bool CNonStreamingSpeechRecognitionDlg::Exists(const std::string &filename) {
  298 + std::ifstream is(filename);
  299 + return is.good();
  300 +}
  301 +
  302 +void CNonStreamingSpeechRecognitionDlg::ShowInitRecognizerHelpMessage() {
  303 + my_btn_.EnableWindow(FALSE);
  304 + std::string msg =
  305 + "\r\nPlease go to\r\n"
  306 + "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html "
  307 + "\r\n";
  308 + msg += "to download a non-streaming model, i.e., an offline model.\r\n";
  309 + msg +=
  310 + "You need to rename them to encoder.onnx, decoder.onnx, and "
  311 + "joiner.onnx correspoondingly.\r\n\r\n";
  312 + msg += "It supports both transducer models and paraformer models.\r\n\r\n";
  313 + msg +=
  314 + "We give two examples below to show you how to download models\r\n\r\n";
  315 + msg += "(1) Transducer\r\n\r\n";
  316 + msg +=
  317 + "We use "
  318 + "https://huggingface.co/pkufool/"
  319 + "icefall-asr-zipformer-wenetspeech-20230615 below\r\n";
  320 + msg +=
  321 + "wget "
  322 + "https://huggingface.co/pkufool/"
  323 + "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
  324 + "encoder-epoch-12-avg-4.onnx\r\n";
  325 + msg +=
  326 + "wget "
  327 + "https://huggingface.co/pkufool/"
  328 + "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
  329 + "decoder-epoch-12-avg-4.onnx\r\n";
  330 + msg +=
  331 + "wget "
  332 + "https://huggingface.co/pkufool/"
  333 + "icefall-asr-zipformer-wenetspeech-20230615/resolve/main/exp/"
  334 + "joiner-epoch-12-avg-4.onnx\r\n";
  335 + msg += "\r\n Now rename them\r\n";
  336 + msg += "mv encoder-epoch-12-avg-4.onnx encoder.onnx\r\n";
  337 + msg += "mv decoder-epoch-12-avg-4.onnx decoder.onnx\r\n";
  338 + msg += "mv joiner-epoch-12-avg-4.onnx joiner.onnx\r\n\r\n";
  339 + msg += "(2) Paraformer\r\n\r\n";
  340 + msg +=
  341 + "wget "
  342 + "https://huggingface.co/csukuangfj/"
  343 + "sherpa-onnx-paraformer-zh-2023-03-28/resolve/main/model.onnx\r\n";
  344 + msg +=
  345 + "wget "
  346 + "https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28/"
  347 + "resolve/main/tokens.txt\r\n\r\n";
  348 + msg += "\r\n Now rename them\r\n";
  349 + msg += "mv model.onnx paraformer.onnx\r\n";
  350 + msg += "\r\n";
  351 + msg += "That's it!\r\n";
  352 +
  353 + AppendLineToMultilineEditCtrl(msg);
  354 +}
  355 +
  356 +void CNonStreamingSpeechRecognitionDlg::InitParaformer() {
  357 + std::string paraformer = "./paraformer.onnx";
  358 + std::string tokens = "./tokens.txt";
  359 +
  360 + bool is_ok = true;
  361 +
  362 + if (Exists("./paraformer.int8.onnx")) {
  363 + paraformer = "./paraformer.int8.onnx";
  364 + } else if (!Exists(paraformer)) {
  365 + std::string msg = paraformer + " does not exist!";
  366 + AppendLineToMultilineEditCtrl(msg);
  367 + is_ok = false;
  368 + }
  369 +
  370 + if (!Exists(tokens)) {
  371 + std::string msg = tokens + " does not exist!";
  372 + AppendLineToMultilineEditCtrl(msg);
  373 + is_ok = false;
  374 + }
  375 +
  376 + if (!is_ok) {
  377 + ShowInitRecognizerHelpMessage();
  378 + return;
  379 + }
  380 +
  381 + memset(&config_, 0, sizeof(config_));
  382 +
  383 + config_.feat_config.sample_rate = 16000;
  384 + config_.feat_config.feature_dim = 80;
  385 +
  386 + config_.model_config.paraformer.model = paraformer.c_str();
  387 + config_.model_config.tokens = tokens.c_str();
  388 + config_.model_config.num_threads = 1;
  389 + config_.model_config.debug = 1;
  390 +
  391 + config_.decoding_method = "greedy_search";
  392 + config_.max_active_paths = 4;
  393 +
  394 + recognizer_ = CreateOfflineRecognizer(&config_);
  395 +}
  396 +
  397 +void CNonStreamingSpeechRecognitionDlg::InitRecognizer() {
  398 + if (Exists("./paraformer.onnx") || Exists("./paraformer.int8.onnx")) {
  399 + InitParaformer();
  400 + return;
  401 + }
  402 +
  403 + // assume it is transducer
  404 +
  405 + std::string encoder = "./encoder.onnx";
  406 + std::string decoder = "./decoder.onnx";
  407 + std::string joiner = "./joiner.onnx";
  408 + std::string tokens = "./tokens.txt";
  409 +
  410 + bool is_ok = true;
  411 + if (!Exists(encoder)) {
  412 + std::string msg = encoder + " does not exist!";
  413 + AppendLineToMultilineEditCtrl(msg);
  414 + is_ok = false;
  415 + }
  416 +
  417 + if (!Exists(decoder)) {
  418 + std::string msg = decoder + " does not exist!";
  419 + AppendLineToMultilineEditCtrl(msg);
  420 + is_ok = false;
  421 + }
  422 +
  423 + if (!Exists(joiner)) {
  424 + std::string msg = joiner + " does not exist!";
  425 + AppendLineToMultilineEditCtrl(msg);
  426 + is_ok = false;
  427 + }
  428 +
  429 + if (!Exists(tokens)) {
  430 + std::string msg = tokens + " does not exist!";
  431 + AppendLineToMultilineEditCtrl(msg);
  432 + is_ok = false;
  433 + }
  434 +
  435 + if (!is_ok) {
  436 + ShowInitRecognizerHelpMessage();
  437 + return;
  438 + }
  439 + memset(&config_, 0, sizeof(config_));
  440 +
  441 + config_.feat_config.sample_rate = 16000;
  442 + config_.feat_config.feature_dim = 80;
  443 +
  444 + config_.model_config.transducer.encoder = encoder.c_str();
  445 + config_.model_config.transducer.decoder = decoder.c_str();
  446 + config_.model_config.transducer.joiner = joiner.c_str();
  447 + config_.model_config.tokens = tokens.c_str();
  448 + config_.model_config.num_threads = 1;
  449 + config_.model_config.debug = 0;
  450 +
  451 + config_.decoding_method = "greedy_search";
  452 + config_.max_active_paths = 4;
  453 +
  454 + recognizer_ = CreateOfflineRecognizer(&config_);
  455 +}
  456 +
  457 +void CNonStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
  458 + const std::string &s) {
  459 + // get the initial text length
  460 + int nLength = my_text_.GetWindowTextLength();
  461 + // put the selection at the end of text
  462 + my_text_.SetSel(nLength, nLength);
  463 + // replace the selection
  464 +
  465 + std::wstring wstr = Utf8ToUtf16(s);
  466 +
  467 + my_text_.ReplaceSel(wstr.c_str());
  468 +}
  469 +
  470 +void CNonStreamingSpeechRecognitionDlg::AppendLineToMultilineEditCtrl(
  471 + const std::string &s) {
  472 + AppendTextToEditCtrl("\r\n" + s);
  473 +}
  1 +
  2 +// NonStreamingSpeechRecognitionDlg.h : header file
  3 +//
  4 +
  5 +#pragma once
  6 +
  7 +#include <string>
  8 +#include <vector>
  9 +
  10 +#include "portaudio.h"
  11 +#include "sherpa-onnx/c-api/c-api.h"
  12 +
  13 +class Microphone {
  14 + public:
  15 + Microphone();
  16 + ~Microphone();
  17 +};
  18 +
  19 +// CNonStreamingSpeechRecognitionDlg dialog
  20 +class CNonStreamingSpeechRecognitionDlg : public CDialogEx {
  21 + // Construction
  22 + public:
  23 + CNonStreamingSpeechRecognitionDlg(
  24 + CWnd *pParent = nullptr); // standard constructor
  25 + ~CNonStreamingSpeechRecognitionDlg();
  26 +
  27 +// Dialog Data
  28 +#ifdef AFX_DESIGN_TIME
  29 + enum { IDD = IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG };
  30 +#endif
  31 +
  32 + protected:
  33 + virtual void DoDataExchange(CDataExchange *pDX); // DDX/DDV support
  34 +
  35 + // Implementation
  36 + protected:
  37 + HICON m_hIcon;
  38 +
  39 + // Generated message map functions
  40 + virtual BOOL OnInitDialog();
  41 + afx_msg void OnPaint();
  42 + afx_msg HCURSOR OnQueryDragIcon();
  43 + DECLARE_MESSAGE_MAP()
  44 + public:
  45 + afx_msg void OnBnClickedOk();
  46 + int RunThread();
  47 +
  48 + private:
  49 + Microphone mic_;
  50 +
  51 + SherpaOnnxOfflineRecognizer *recognizer_ = nullptr;
  52 + SherpaOnnxOfflineRecognizerConfig config_;
  53 +
  54 + PaStream *pa_stream_ = nullptr;
  55 + CButton my_btn_;
  56 + CEdit my_text_;
  57 + std::vector<std::string> results_;
  58 +
  59 + public:
  60 + bool started_ = false;
  61 + std::vector<float> samples_;
  62 +
  63 + private:
  64 + void AppendTextToEditCtrl(const std::string &s);
  65 + void AppendLineToMultilineEditCtrl(const std::string &s);
  66 + void InitMicrophone();
  67 +
  68 + bool Exists(const std::string &filename);
  69 + void InitRecognizer();
  70 +
  71 + void InitParaformer();
  72 + void ShowInitRecognizerHelpMessage();
  73 +};
  1 +//{{NO_DEPENDENCIES}}
  2 +// Microsoft Visual C++ generated include file.
  3 +// Used by NonStreamingSpeechRecognition.rc
  4 +//
  5 +#define IDD_NONSTREAMINGSPEECHRECOGNITION_DIALOG 102
  6 +#define IDR_MAINFRAME 128
  7 +#define IDC_EDIT1 1000
  8 +
  9 +// Next default values for new objects
  10 +//
  11 +#ifdef APSTUDIO_INVOKED
  12 +#ifndef APSTUDIO_READONLY_SYMBOLS
  13 +#define _APS_NEXT_RESOURCE_VALUE 130
  14 +#define _APS_NEXT_COMMAND_VALUE 32771
  15 +#define _APS_NEXT_CONTROL_VALUE 1001
  16 +#define _APS_NEXT_SYMED_VALUE 101
  17 +#endif
  18 +#endif
  1 +#pragma once
  2 +
  3 +#ifndef VC_EXTRALEAN
  4 +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers
  5 +#endif
  6 +
  7 +#include "targetver.h"
  8 +
  9 +#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS // some CString constructors will be
  10 + // explicit
  11 +
  12 +// turns off MFC's hiding of some common and often safely ignored warning
  13 +// messages
  14 +#define _AFX_ALL_WARNINGS
  15 +
  16 +#include <afxext.h> // MFC extensions
  17 +#include <afxwin.h> // MFC core and standard components
  18 +
  19 +#ifndef _AFX_NO_OLE_SUPPORT
  20 +#include <afxdtctl.h> // MFC support for Internet Explorer 4 Common Controls
  21 +#endif
  22 +#ifndef _AFX_NO_AFXCMN_SUPPORT
  23 +#include <afxcmn.h> // MFC support for Windows Common Controls
  24 +#endif // _AFX_NO_AFXCMN_SUPPORT
  25 +
  26 +#include <afxcontrolbars.h> // MFC support for ribbons and control bars
  1 +// pch.cpp: source file corresponding to the pre-compiled header
  2 +
  3 +#include "pch.h"
  4 +
  5 +// When you are using pre-compiled headers, this source file is necessary for
  6 +// compilation to succeed.
  1 +// pch.h: This is a precompiled header file.
  2 +// Files listed below are compiled only once, improving build performance for
  3 +// future builds. This also affects IntelliSense performance, including code
  4 +// completion and many code browsing features. However, files listed here are
  5 +// ALL re-compiled if any one of them is updated between builds. Do not add
  6 +// files here that you will be updating frequently as this negates the
  7 +// performance advantage.
  8 +
  9 +#ifndef PCH_H
  10 +#define PCH_H
  11 +
  12 +// add headers that you want to pre-compile here
  13 +#include "framework.h"
  14 +
  15 +#endif // PCH_H
  1 +<?xml version="1.0" encoding="utf-8"?>
  2 +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
  3 + <ImportGroup Label="PropertySheets" />
  4 + <PropertyGroup Label="UserMacros" />
  5 + <PropertyGroup>
  6 + <SherpaOnnxBuildDirectory>..\..\build</SherpaOnnxBuildDirectory>
  7 + <SherpaOnnxInstallDirectory>..\..\build\install</SherpaOnnxInstallDirectory>
  8 + <SherpaOnnxLibraries>
  9 + sherpa-onnx-portaudio_static.lib;
  10 + sherpa-onnx-c-api.lib;
  11 + sherpa-onnx-core.lib;
  12 + kaldi-native-fbank-core.lib;
  13 + absl_base.lib;
  14 + absl_city.lib;
  15 + absl_hash.lib;
  16 + absl_low_level_hash.lib;
  17 + absl_raw_hash_set.lib;
  18 + absl_raw_logging_internal.lib;
  19 + absl_throw_delegate.lib;
  20 + clog.lib;
  21 + cpuinfo.lib;
  22 + flatbuffers.lib;
  23 + libprotobuf-lite.lib;
  24 + onnx.lib;
  25 + onnx_proto.lib;
  26 + onnxruntime_common.lib;
  27 + onnxruntime_flatbuffers.lib;
  28 + onnxruntime_framework.lib;
  29 + onnxruntime_graph.lib;
  30 + onnxruntime_mlas.lib;
  31 + onnxruntime_optimizer.lib;
  32 + onnxruntime_providers.lib;
  33 + onnxruntime_session.lib;
  34 + onnxruntime_util.lib;
  35 + re2.lib;
  36 + </SherpaOnnxLibraries>
  37 + </PropertyGroup>
  38 + <ItemDefinitionGroup>
  39 + <ClCompile>
  40 + <AdditionalIncludeDirectories>
  41 + $(SherpaOnnxBuildDirectory)\_deps\portaudio-src\include;
  42 + $(SherpaOnnxInstallDirectory)\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
  43 + </ClCompile>
  44 + <Link>
  45 + <AdditionalLibraryDirectories>$(SherpaOnnxInstallDirectory)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
  46 + <AdditionalDependencies>$(SherpaOnnxLibraries);</AdditionalDependencies>
  47 + </Link>
  48 + </ItemDefinitionGroup>
  49 + <ItemGroup />
  50 +</Project>
  1 +#pragma once
  2 +
  3 +// Including SDKDDKVer.h defines the highest available Windows platform.
  4 +
  5 +// If you wish to build your application for a previous Windows platform,
  6 +// include WinSDKVer.h and set the _WIN32_WINNT macro to the platform you wish
  7 +// to support before including SDKDDKVer.h.
  8 +
  9 +#include <SDKDDKVer.h>
@@ -3,11 +3,19 @@ @@ -3,11 +3,19 @@
3 This directory contains examples showing how to use Next-gen Kaldi in MFC 3 This directory contains examples showing how to use Next-gen Kaldi in MFC
4 for speech recognition. 4 for speech recognition.
5 5
6 -Caution: You need to use Windows and install Visual Studio in order to run it. 6 +Caution: You need to use Windows and install Visual Studio 2022 in order to
  7 +compile it.
  8 +
  9 +Hint: If you don't want to install Visual Studio, you can find below
  10 +about how to download pre-compiled `exe`.
  11 +
7 We use bash script below to demonstrate how to use it. Please change 12 We use bash script below to demonstrate how to use it. Please change
8 the commands accordingly for Windows. 13 the commands accordingly for Windows.
9 14
10 -## Streaming speech recognition 15 +## How to compile
  16 +
  17 +
  18 +First, we need to compile sherpa-onnx:
11 19
12 ```bash 20 ```bash
13 mkdir -p $HOME/open-source 21 mkdir -p $HOME/open-source
@@ -19,7 +27,6 @@ mkdir build @@ -19,7 +27,6 @@ mkdir build
19 27
20 cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install .. 28 cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=./install ..
21 cmake --build . --config Release --target install 29 cmake --build . --config Release --target install
22 -  
23 cd ../mfc-examples 30 cd ../mfc-examples
24 31
25 msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64 32 msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x64
@@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6 @@ -27,26 +34,13 @@ msbuild ./mfc-examples.sln /property:Configuration=Release /property:Platform=x6
27 # now run the program 34 # now run the program
28 35
29 ./x64/Release/StreamingSpeechRecognition.exe 36 ./x64/Release/StreamingSpeechRecognition.exe
  37 +./x64/Release/NonStreamingSpeechRecognition.exe
30 ``` 38 ```
31 39
32 -Note that we also need to download pre-trained models. Please  
33 -refer to https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html  
34 -for a list of streaming models. 40 +If you don't want to compile the project by yourself, you can download
  41 +pre-compiled `exe` from https://github.com/k2-fsa/sherpa-onnx/releases
35 42
36 -We use the following model for demonstration. 43 +For instance, you can use the following addresses:
37 44
38 -```bash  
39 -cd $HOME/open-source/sherpa-onnx/mfc-examples/x64/Release  
40 -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx  
41 -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx  
42 -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx  
43 -wget https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt  
44 -  
45 -# now rename  
46 -mv encoder-epoch-12-avg-4-chunk-16-left-128.onnx encoder.onnx  
47 -mv decoder-epoch-12-avg-4-chunk-16-left-128.onnx decoder.onnx  
48 -mv joiner-epoch-12-avg-4-chunk-16-left-128.onnx joiner.onnx  
49 -  
50 -# Now run it!  
51 -./StreamingSpeechRecognition.exe  
52 -``` 45 + - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-streaming-v1.5.1.exe
  46 + - https://github.com/k2-fsa/sherpa-onnx/releases/download/v1.5.1/sherpa-onnx-non-streaming-v1.5.1.exe
@@ -3,12 +3,14 @@ @@ -3,12 +3,14 @@
3 // application. 3 // application.
4 // 4 //
5 5
  6 +// clang-format off
6 #include "pch.h" 7 #include "pch.h"
7 #include "framework.h" 8 #include "framework.h"
  9 +// clang-format on
8 10
9 #include "StreamingSpeechRecognition.h" 11 #include "StreamingSpeechRecognition.h"
10 -#include "StreamingSpeechRecognitionDlg.h"  
11 12
  13 +#include "StreamingSpeechRecognitionDlg.h"
12 14
13 #ifdef _DEBUG 15 #ifdef _DEBUG
14 #define new DEBUG_NEW 16 #define new DEBUG_NEW
1 1
2 // StreamingSpeechRecognitionDlg.cpp : implementation file 2 // StreamingSpeechRecognitionDlg.cpp : implementation file
3 // 3 //
  4 +// clang-format off
4 #include "pch.h" 5 #include "pch.h"
5 #include "framework.h" 6 #include "framework.h"
6 #include "afxdialogex.h" 7 #include "afxdialogex.h"
7 - 8 +// clang-format on
8 9
9 #include "StreamingSpeechRecognitionDlg.h" 10 #include "StreamingSpeechRecognitionDlg.h"
10 11
@@ -15,7 +16,6 @@ @@ -15,7 +16,6 @@
15 16
16 #include "StreamingSpeechRecognition.h" 17 #include "StreamingSpeechRecognition.h"
17 18
18 -  
19 #ifdef _DEBUG 19 #ifdef _DEBUG
20 #define new DEBUG_NEW 20 #define new DEBUG_NEW
21 #endif 21 #endif
@@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() { @@ -223,6 +223,7 @@ void CStreamingSpeechRecognitionDlg::InitMicrophone() {
223 // exit(-1); 223 // exit(-1);
224 AppendLineToMultilineEditCtrl("No default input device found!"); 224 AppendLineToMultilineEditCtrl("No default input device found!");
225 my_btn_.EnableWindow(FALSE); 225 my_btn_.EnableWindow(FALSE);
  226 + return;
226 } 227 }
227 AppendLineToMultilineEditCtrl(std::string("Selected device ") + 228 AppendLineToMultilineEditCtrl(std::string("Selected device ") +
228 Pa_GetDeviceInfo(default_device)->name); 229 Pa_GetDeviceInfo(default_device)->name);
@@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() { @@ -309,7 +310,6 @@ void CStreamingSpeechRecognitionDlg::InitRecognizer() {
309 msg += "\r\n"; 310 msg += "\r\n";
310 msg += "That's it!\r\n"; 311 msg += "That's it!\r\n";
311 312
312 -  
313 AppendLineToMultilineEditCtrl(msg); 313 AppendLineToMultilineEditCtrl(msg);
314 return; 314 return;
315 } 315 }
@@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl( @@ -398,8 +398,6 @@ void CStreamingSpeechRecognitionDlg::AppendTextToEditCtrl(
398 // put the selection at the end of text 398 // put the selection at the end of text
399 my_text_.SetSel(nLength, nLength); 399 my_text_.SetSel(nLength, nLength);
400 // replace the selection 400 // replace the selection
401 - CString str;  
402 - str.Format(_T("%s"), s.c_str());  
403 401
404 std::wstring wstr = Utf8ToUtf16(s); 402 std::wstring wstr = Utf8ToUtf16(s);
405 403
1  1 
2 Microsoft Visual Studio Solution File, Format Version 12.00 2 Microsoft Visual Studio Solution File, Format Version 12.00
3 -# Visual Studio Version 16  
4 -VisualStudioVersion = 16.0.32630.194 3 +# Visual Studio Version 17
  4 +VisualStudioVersion = 17.6.33829.357
5 MinimumVisualStudioVersion = 10.0.40219.1 5 MinimumVisualStudioVersion = 10.0.40219.1
6 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}" 6 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "StreamingSpeechRecognition", "StreamingSpeechRecognition\StreamingSpeechRecognition.vcxproj", "{A79C2604-C33D-497C-9770-D34E118B77FE}"
7 EndProject 7 EndProject
  8 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "NonStreamingSpeechRecognition", "NonStreamingSpeechRecognition\NonStreamingSpeechRecognition.vcxproj", "{0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}"
  9 +EndProject
8 Global 10 Global
9 GlobalSection(SolutionConfigurationPlatforms) = preSolution 11 GlobalSection(SolutionConfigurationPlatforms) = preSolution
10 Debug|x64 = Debug|x64 12 Debug|x64 = Debug|x64
@@ -21,6 +23,14 @@ Global @@ -21,6 +23,14 @@ Global
21 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64 23 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x64.Build.0 = Release|x64
22 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32 24 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.ActiveCfg = Release|Win32
23 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32 25 {A79C2604-C33D-497C-9770-D34E118B77FE}.Release|x86.Build.0 = Release|Win32
  26 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.ActiveCfg = Debug|x64
  27 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x64.Build.0 = Debug|x64
  28 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.ActiveCfg = Debug|Win32
  29 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Debug|x86.Build.0 = Debug|Win32
  30 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.ActiveCfg = Release|x64
  31 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x64.Build.0 = Release|x64
  32 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.ActiveCfg = Release|Win32
  33 + {0298EE00-7AF2-4A66-9D5F-AA0D92AC871D}.Release|x86.Build.0 = Release|Win32
24 EndGlobalSection 34 EndGlobalSection
25 GlobalSection(SolutionProperties) = preSolution 35 GlobalSection(SolutionProperties) = preSolution
26 HideSolutionNode = FALSE 36 HideSolutionNode = FALSE
@@ -27,36 +27,38 @@ struct SherpaOnnxDisplay { @@ -27,36 +27,38 @@ struct SherpaOnnxDisplay {
27 std::unique_ptr<sherpa_onnx::Display> impl; 27 std::unique_ptr<sherpa_onnx::Display> impl;
28 }; 28 };
29 29
  30 +#define SHERPA_ONNX_OR(x, y) (x ? x : y)
  31 +
30 SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( 32 SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
31 const SherpaOnnxOnlineRecognizerConfig *config) { 33 const SherpaOnnxOnlineRecognizerConfig *config) {
32 sherpa_onnx::OnlineRecognizerConfig recognizer_config; 34 sherpa_onnx::OnlineRecognizerConfig recognizer_config;
33 35
34 - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate;  
35 - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; 36 + recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
  37 + recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
36 38
37 recognizer_config.model_config.encoder_filename = 39 recognizer_config.model_config.encoder_filename =
38 - config->model_config.encoder; 40 + SHERPA_ONNX_OR(config->model_config.encoder, "");
39 recognizer_config.model_config.decoder_filename = 41 recognizer_config.model_config.decoder_filename =
40 - config->model_config.decoder;  
41 - recognizer_config.model_config.joiner_filename = config->model_config.joiner;  
42 - recognizer_config.model_config.tokens = config->model_config.tokens;  
43 - recognizer_config.model_config.num_threads = config->model_config.num_threads;  
44 - recognizer_config.model_config.provider = config->model_config.provider;  
45 - recognizer_config.model_config.debug = config->model_config.debug; 42 + SHERPA_ONNX_OR(config->model_config.decoder, "");
  43 + recognizer_config.model_config.joiner_filename = SHERPA_ONNX_OR(config->model_config.joiner, "");
  44 + recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
  45 + recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
  46 + recognizer_config.model_config.provider = SHERPA_ONNX_OR(config->model_config.provider, "cpu");
  47 + recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
46 48
47 - recognizer_config.decoding_method = config->decoding_method;  
48 - recognizer_config.max_active_paths = config->max_active_paths; 49 + recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
  50 + recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
49 51
50 - recognizer_config.enable_endpoint = config->enable_endpoint; 52 + recognizer_config.enable_endpoint = SHERPA_ONNX_OR(config->enable_endpoint, 0);
51 53
52 recognizer_config.endpoint_config.rule1.min_trailing_silence = 54 recognizer_config.endpoint_config.rule1.min_trailing_silence =
53 - config->rule1_min_trailing_silence; 55 + SHERPA_ONNX_OR(config->rule1_min_trailing_silence, 2.4);
54 56
55 recognizer_config.endpoint_config.rule2.min_trailing_silence = 57 recognizer_config.endpoint_config.rule2.min_trailing_silence =
56 - config->rule2_min_trailing_silence; 58 + SHERPA_ONNX_OR(config->rule2_min_trailing_silence, 1.2);
57 59
58 recognizer_config.endpoint_config.rule3.min_utterance_length = 60 recognizer_config.endpoint_config.rule3.min_utterance_length =
59 - config->rule3_min_utterance_length; 61 + SHERPA_ONNX_OR(config->rule3_min_utterance_length, 20);
60 62
61 if (config->model_config.debug) { 63 if (config->model_config.debug) {
62 fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); 64 fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());
@@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( @@ -171,34 +173,34 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
171 const SherpaOnnxOfflineRecognizerConfig *config) { 173 const SherpaOnnxOfflineRecognizerConfig *config) {
172 sherpa_onnx::OfflineRecognizerConfig recognizer_config; 174 sherpa_onnx::OfflineRecognizerConfig recognizer_config;
173 175
174 - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; 176 + recognizer_config.feat_config.sampling_rate = SHERPA_ONNX_OR(config->feat_config.sample_rate, 16000);
175 177
176 - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; 178 + recognizer_config.feat_config.feature_dim = SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
177 179
178 recognizer_config.model_config.transducer.encoder_filename = 180 recognizer_config.model_config.transducer.encoder_filename =
179 - config->model_config.transducer.encoder; 181 + SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
180 182
181 recognizer_config.model_config.transducer.decoder_filename = 183 recognizer_config.model_config.transducer.decoder_filename =
182 - config->model_config.transducer.decoder; 184 + SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
183 185
184 recognizer_config.model_config.transducer.joiner_filename = 186 recognizer_config.model_config.transducer.joiner_filename =
185 - config->model_config.transducer.joiner; 187 + SHERPA_ONNX_OR(config->model_config.transducer.joiner,"");
186 188
187 recognizer_config.model_config.paraformer.model = 189 recognizer_config.model_config.paraformer.model =
188 - config->model_config.paraformer.model; 190 + SHERPA_ONNX_OR(config->model_config.paraformer.model, "");
189 191
190 recognizer_config.model_config.nemo_ctc.model = 192 recognizer_config.model_config.nemo_ctc.model =
191 - config->model_config.nemo_ctc.model; 193 + SHERPA_ONNX_OR(config->model_config.nemo_ctc.model, "");
192 194
193 - recognizer_config.model_config.tokens = config->model_config.tokens;  
194 - recognizer_config.model_config.num_threads = config->model_config.num_threads;  
195 - recognizer_config.model_config.debug = config->model_config.debug; 195 + recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, "");
  196 + recognizer_config.model_config.num_threads = SHERPA_ONNX_OR(config->model_config.num_threads, 1);
  197 + recognizer_config.model_config.debug = SHERPA_ONNX_OR(config->model_config.debug, 0);
196 198
197 - recognizer_config.lm_config.model = config->lm_config.model;  
198 - recognizer_config.lm_config.scale = config->lm_config.scale; 199 + recognizer_config.lm_config.model = SHERPA_ONNX_OR(config->lm_config.model, "");
  200 + recognizer_config.lm_config.scale = SHERPA_ONNX_OR(config->lm_config.scale, 1.0);
199 201
200 - recognizer_config.decoding_method = config->decoding_method;  
201 - recognizer_config.max_active_paths = config->max_active_paths; 202 + recognizer_config.decoding_method = SHERPA_ONNX_OR(config->decoding_method, "greedy_search");
  203 + recognizer_config.max_active_paths = SHERPA_ONNX_OR(config->max_active_paths, 4);
202 204
203 if (config->model_config.debug) { 205 if (config->model_config.debug) {
204 fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); 206 fprintf(stderr, "%s\n", recognizer_config.ToString().c_str());