Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Fangjun Kuang
2024-03-25 18:45:09 +0800
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2024-03-25 18:45:09 +0800
Commit
305c373107062a00ef25200abdd099c08e542913
305c3731
1 parent
83a10a55
Add C# API for spoken language identification (#697)
隐藏空白字符变更
内嵌
并排对比
正在显示
10 个修改的文件
包含
265 行增加
和
55 行删除
.github/scripts/test-dot-net.sh
.github/workflows/test-dot-net-nuget.yaml
.github/workflows/test-dot-net.yaml
dotnet-examples/sherpa-onnx.sln
dotnet-examples/spoken-language-identification/Program.cs
dotnet-examples/spoken-language-identification/WaveReader.cs
dotnet-examples/spoken-language-identification/run.sh
dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
scripts/dotnet/examples/spoken-language-identification.csproj
scripts/dotnet/offline.cs
.github/scripts/test-dot-net.sh
0 → 100755
查看文件 @
305c373
#!/usr/bin/env bash
cd
dotnet-examples/
cd
spoken-language-identification
./run.sh
cd
../online-decode-files
./run-zipformer2-ctc.sh
./run-transducer.sh
./run-paraformer.sh
cd
../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh
cd
../offline-tts
./run-aishell3.sh
./run-piper.sh
ls -lh
cd
../..
mkdir tts
cp dotnet-examples/offline-tts/
*
.wav ./tts
...
...
.github/workflows/test-dot-net-nuget.yaml
查看文件 @
305c373
...
...
@@ -40,33 +40,10 @@ jobs:
-
name
:
Check dotnet
run
:
dotnet --info
-
name
:
Decode a file
-
name
:
Run tests
shell
:
bash
run
:
|
cd dotnet-examples/
cd online-decode-files
./run-transducer.sh
./run-paraformer.sh
cd ../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh
cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
ls -lh
cd ../..
mkdir tts
cp dotnet-examples/offline-tts/*.wav ./tts
.github/scripts/test-dot-net.sh
-
uses
:
actions/upload-artifact@v4
with
:
...
...
.github/workflows/test-dot-net.yaml
查看文件 @
305c373
...
...
@@ -177,39 +177,16 @@ jobs:
cp -v scripts/dotnet/examples/offline-decode-files.csproj dotnet-examples/offline-decode-files/
cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
ls -lh /tmp
-
name
:
Decode a file
-
name
:
Run tests
shell
:
bash
run
:
|
cd dotnet-examples/
.github/scripts/test-dot-net.sh
cd online-decode-files
./run-zipformer2-ctc.sh
./run-transducer.sh
./run-paraformer.sh
cd ../offline-decode-files
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-hotwords.sh
./run-whisper.sh
./run-tdnn-yesno.sh
cd ../offline-tts
./run-aishell3.sh
./run-piper.sh
ls -lh
cd ../..
mkdir tts
cp dotnet-examples/offline-tts/*.wav ./tts
-
uses
:
actions/upload-artifact@v3
-
uses
:
actions/upload-artifact@v4
with
:
name
:
dot-net-tts-generated-test-files-${{ matrix.os }}
path
:
tts
...
...
dotnet-examples/sherpa-onnx.sln
查看文件 @
305c373
...
...
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts", "offline-tts\
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline-tts-play\offline-tts-play.csproj", "{40781464-5948-462B-BA4B-98932711513F}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
...
...
@@ -42,5 +44,9 @@ Global
{40781464-5948-462B-BA4B-98932711513F}.Debug|Any CPU.Build.0 = Debug|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.ActiveCfg = Release|Any CPU
{40781464-5948-462B-BA4B-98932711513F}.Release|Any CPU.Build.0 = Release|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal
...
...
dotnet-examples/spoken-language-identification/Program.cs
0 → 100644
查看文件 @
305c373
// Copyright (c) 2024 Xiaomi Corporation
//
// This file shows how to do spoken language identification with whisper.
//
// 1. Download a whisper multilingual model. We use a tiny model below.
// Please refer to https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
// to download more models.
//
// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
// tar xvf sherpa-onnx-whisper-tiny.tar.bz2
// rm sherpa-onnx-whisper-tiny.tar.bz2
//
// 2. Now run it
//
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
SpokenLanguageIdentificationDemo
{
static
void
Main
(
string
[]
args
)
{
var
config
=
new
SpokenLanguageIdentificationConfig
();
config
.
Whisper
.
Encoder
=
"./sherpa-onnx-whisper-tiny/tiny-encoder.int8.onnx"
;
config
.
Whisper
.
Decoder
=
"./sherpa-onnx-whisper-tiny/tiny-decoder.int8.onnx"
;
var
slid
=
new
SpokenLanguageIdentification
(
config
);
var
filename
=
"./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
;
WaveReader
waveReader
=
new
WaveReader
(
filename
);
var
s
=
slid
.
CreateStream
();
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
waveReader
.
Samples
);
var
result
=
slid
.
Compute
(
s
);
Console
.
WriteLine
(
$
"Filename: {filename}"
);
Console
.
WriteLine
(
$
"Detected language: {result.Lang}"
);
}
}
...
...
dotnet-examples/spoken-language-identification/WaveReader.cs
0 → 120000
查看文件 @
305c373
../
offline
-
decode
-
files
/
WaveReader
.
cs
\ No newline at end of file
...
...
dotnet-examples/spoken-language-identification/run.sh
0 → 100755
查看文件 @
305c373
#!/usr/bin/env bash
set
-ex
if
[
! -d ./sherpa-onnx-whisper-tiny
]
;
then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.tar.bz2
rm sherpa-onnx-whisper-tiny.tar.bz2
fi
dotnet run
...
...
dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
0 → 100644
查看文件 @
305c373
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>spoken_language_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
...
...
scripts/dotnet/examples/spoken-language-identification.csproj
0 → 100644
查看文件 @
305c373
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<RootNamespace>spoken_language_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<PropertyGroup>
<RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
</ItemGroup>
</Project>
...
...
scripts/dotnet/offline.cs
查看文件 @
305c373
...
...
@@ -403,8 +403,8 @@ namespace SherpaOnnx
while
(*
buffer
!=
0
)
{
++
buffer
;
length
+=
1
;
}
length
=
(
int
)(
buffer
-
(
byte
*)
impl
.
Text
);
}
byte
[]
stringBuffer
=
new
byte
[
length
];
...
...
@@ -496,8 +496,6 @@ namespace SherpaOnnx
return
new
OfflineStream
(
p
);
}
/// You have to ensure that IsReady(stream) returns true before
/// you call this method
public
void
Decode
(
OfflineStream
stream
)
{
Decode
(
_handle
.
Handle
,
stream
.
Handle
);
...
...
@@ -549,4 +547,137 @@ namespace SherpaOnnx
private
static
extern
void
Decode
(
IntPtr
handle
,
IntPtr
[]
streams
,
int
n
);
}
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
SpokenLanguageIdentificationWhisperConfig
{
public
SpokenLanguageIdentificationWhisperConfig
()
{
Encoder
=
""
;
Decoder
=
""
;
TailPaddings
=
-
1
;
}
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Encoder
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Decoder
;
public
int
TailPaddings
;
}
public
struct
SpokenLanguageIdentificationConfig
{
public
SpokenLanguageIdentificationConfig
()
{
Whisper
=
new
SpokenLanguageIdentificationWhisperConfig
();
NumThreads
=
1
;
Debug
=
0
;
Provider
=
"cpu"
;
}
public
SpokenLanguageIdentificationWhisperConfig
Whisper
;
public
int
NumThreads
;
public
int
Debug
;
[
MarshalAs
(
UnmanagedType
.
LPStr
)]
public
string
Provider
;
}
public
class
SpokenLanguageIdentificationResult
{
public
SpokenLanguageIdentificationResult
(
IntPtr
handle
)
{
Impl
impl
=
(
Impl
)
Marshal
.
PtrToStructure
(
handle
,
typeof
(
Impl
));
// PtrToStringUTF8() requires .net standard 2.1
// _text = Marshal.PtrToStringUTF8(impl.Text);
int
length
=
0
;
unsafe
{
byte
*
buffer
=
(
byte
*)
impl
.
Lang
;
while
(*
buffer
!=
0
)
{
++
buffer
;
length
+=
1
;
}
}
byte
[]
stringBuffer
=
new
byte
[
length
];
Marshal
.
Copy
(
impl
.
Lang
,
stringBuffer
,
0
,
length
);
_lang
=
Encoding
.
UTF8
.
GetString
(
stringBuffer
);
}
[
StructLayout
(
LayoutKind
.
Sequential
)]
struct
Impl
{
public
IntPtr
Lang
;
}
private
String
_lang
;
public
String
Lang
=>
_lang
;
}
public
class
SpokenLanguageIdentification
:
IDisposable
{
public
SpokenLanguageIdentification
(
SpokenLanguageIdentificationConfig
config
)
{
IntPtr
h
=
SherpaOnnxCreateSpokenLanguageIdentification
(
ref
config
);
_handle
=
new
HandleRef
(
this
,
h
);
}
public
OfflineStream
CreateStream
()
{
IntPtr
p
=
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
_handle
.
Handle
);
return
new
OfflineStream
(
p
);
}
public
SpokenLanguageIdentificationResult
Compute
(
OfflineStream
stream
)
{
IntPtr
h
=
SherpaOnnxSpokenLanguageIdentificationCompute
(
_handle
.
Handle
,
stream
.
Handle
);
SpokenLanguageIdentificationResult
result
=
new
SpokenLanguageIdentificationResult
(
h
);
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
h
);
return
result
;
}
public
void
Dispose
()
{
Cleanup
();
// Prevent the object from being placed on the
// finalization queue
System
.
GC
.
SuppressFinalize
(
this
);
}
~
SpokenLanguageIdentification
()
{
Cleanup
();
}
private
void
Cleanup
()
{
SherpaOnnxDestroySpokenLanguageIdentification
(
_handle
.
Handle
);
// Don't permit the handle to be used again.
_handle
=
new
HandleRef
(
this
,
IntPtr
.
Zero
);
}
private
HandleRef
_handle
;
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxCreateSpokenLanguageIdentification
(
ref
SpokenLanguageIdentificationConfig
config
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxDestroySpokenLanguageIdentification
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream
(
IntPtr
handle
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
IntPtr
SherpaOnnxSpokenLanguageIdentificationCompute
(
IntPtr
handle
,
IntPtr
stream
);
[
DllImport
(
Dll
.
Filename
)]
private
static
extern
void
SherpaOnnxDestroySpokenLanguageIdentificationResult
(
IntPtr
handle
);
}
}
...
...
请
注册
或
登录
后发表评论