Toggle navigation
Toggle navigation
此项目
正在载入...
Sign in
xuning
/
sherpaonnx
转到一个项目
Toggle navigation
项目
群组
代码片段
帮助
Toggle navigation pinning
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Authored by
Michael Lamothe
2025-01-04 19:39:06 +1100
Browse Files
Options
Browse Files
Download
Email Patches
Plain Diff
Committed by
GitHub
2025-01-04 16:39:06 +0800
Commit
8a60985363b537abdab9df8d32b59bb3d4c9ae8f
8a609853
1 parent
bf3330c9
Upgraded to .NET 8 and made code style a little more internally consistent. (#1680)
隐藏空白字符变更
内嵌
并排对比
正在显示
29 个修改的文件
包含
335 行增加
和
385 行删除
dotnet-examples/Common/Common.csproj
dotnet-examples/Common/WaveHeader.cs
dotnet-examples/keyword-spotting-from-files/Program.cs
dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
dotnet-examples/keyword-spotting-from-microphone/Program.cs
dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
dotnet-examples/offline-decode-files/Program.cs
dotnet-examples/offline-decode-files/offline-decode-files.csproj
dotnet-examples/offline-punctuation/Program.cs
dotnet-examples/offline-punctuation/offline-punctuation.csproj
dotnet-examples/offline-speaker-diarization/Program.cs
dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
dotnet-examples/offline-tts-play/Program.cs
dotnet-examples/offline-tts-play/offline-tts-play.csproj
dotnet-examples/offline-tts/Program.cs
dotnet-examples/offline-tts/offline-tts.csproj
dotnet-examples/online-decode-files/Program.cs
dotnet-examples/online-decode-files/online-decode-files.csproj
dotnet-examples/sherpa-onnx.sln
dotnet-examples/speaker-identification/Program.cs
dotnet-examples/speaker-identification/speaker-identification.csproj
dotnet-examples/speech-recognition-from-microphone/Program.cs
dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
dotnet-examples/spoken-language-identification/Program.cs
dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
dotnet-examples/streaming-hlg-decoding/Program.cs
dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
dotnet-examples/Common/Common.csproj
查看文件 @
8a60985
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
</PropertyGroup>
<ItemGroup>
...
...
dotnet-examples/Common/WaveHeader.cs
查看文件 @
8a60985
...
...
@@ -4,171 +4,166 @@ using System.IO;
using
System.Runtime.InteropServices
;
namespace
SherpaOnnx
{
namespace
SherpaOnnx
;
[
StructLayout
(
LayoutKind
.
Sequential
)]
public
struct
WaveHeader
[StructLayout(LayoutKind.Sequential)]
public
struct
WaveHeader
{
public
int
ChunkID
;
public
int
ChunkSize
;
public
int
Format
;
public
int
SubChunk1ID
;
public
int
SubChunk1Size
;
public
short
AudioFormat
;
public
short
NumChannels
;
public
int
SampleRate
;
public
int
ByteRate
;
public
short
BlockAlign
;
public
short
BitsPerSample
;
public
int
SubChunk2ID
;
public
int
SubChunk2Size
;
public
bool
Validate
()
{
public
Int32
ChunkID
;
public
Int32
ChunkSize
;
public
Int32
Format
;
public
Int32
SubChunk1ID
;
public
Int32
SubChunk1Size
;
public
Int16
AudioFormat
;
public
Int16
NumChannels
;
public
Int32
SampleRate
;
public
Int32
ByteRate
;
public
Int16
BlockAlign
;
public
Int16
BitsPerSample
;
public
Int32
SubChunk2ID
;
public
Int32
SubChunk2Size
;
public
bool
Validate
()
if
(
ChunkID
!=
0x46464952
)
{
Console
.
WriteLine
(
$
"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952"
);
return
false
;
}
// E V A W
if
(
Format
!=
0x45564157
)
{
Console
.
WriteLine
(
$
"Invalid format: 0x{Format:X}. Expect 0x45564157"
);
return
false
;
}
// t m f
if
(
SubChunk1ID
!=
0x20746d66
)
{
Console
.
WriteLine
(
$
"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66"
);
return
false
;
}
if
(
SubChunk1Size
!=
16
)
{
Console
.
WriteLine
(
$
"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16"
);
return
false
;
}
if
(
AudioFormat
!=
1
)
{
Console
.
WriteLine
(
$
"Invalid AudioFormat: {AudioFormat}. Expect 1"
);
return
false
;
}
if
(
NumChannels
!=
1
)
{
Console
.
WriteLine
(
$
"Invalid NumChannels: {NumChannels}. Expect 1"
);
return
false
;
}
if
(
ByteRate
!=
(
SampleRate
*
NumChannels
*
BitsPerSample
/
8
))
{
Console
.
WriteLine
(
$
"Invalid byte rate: {ByteRate}."
);
return
false
;
}
if
(
BlockAlign
!=
(
NumChannels
*
BitsPerSample
/
8
))
{
if
(
ChunkID
!=
0x46464952
)
{
Console
.
WriteLine
(
$
"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952"
);
return
false
;
}
// E V A W
if
(
Format
!=
0x45564157
)
{
Console
.
WriteLine
(
$
"Invalid format: 0x{Format:X}. Expect 0x45564157"
);
return
false
;
}
// t m f
if
(
SubChunk1ID
!=
0x20746d66
)
{
Console
.
WriteLine
(
$
"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66"
);
return
false
;
}
if
(
SubChunk1Size
!=
16
)
{
Console
.
WriteLine
(
$
"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16"
);
return
false
;
}
if
(
AudioFormat
!=
1
)
{
Console
.
WriteLine
(
$
"Invalid AudioFormat: {AudioFormat}. Expect 1"
);
return
false
;
}
if
(
NumChannels
!=
1
)
{
Console
.
WriteLine
(
$
"Invalid NumChannels: {NumChannels}. Expect 1"
);
return
false
;
}
if
(
ByteRate
!=
(
SampleRate
*
NumChannels
*
BitsPerSample
/
8
))
{
Console
.
WriteLine
(
$
"Invalid byte rate: {ByteRate}."
);
return
false
;
}
if
(
BlockAlign
!=
(
NumChannels
*
BitsPerSample
/
8
))
{
Console
.
WriteLine
(
$
"Invalid block align: {ByteRate}."
);
return
false
;
}
if
(
BitsPerSample
!=
16
)
{
// we support only 16 bits per sample
Console
.
WriteLine
(
$
"Invalid bits per sample: {BitsPerSample}. Expect 16"
);
return
false
;
}
return
true
;
Console
.
WriteLine
(
$
"Invalid block align: {ByteRate}."
);
return
false
;
}
if
(
BitsPerSample
!=
16
)
{
// we support only 16 bits per sample
Console
.
WriteLine
(
$
"Invalid bits per sample: {BitsPerSample}. Expect 16"
);
return
false
;
}
return
true
;
}
}
// It supports only 16-bit, single channel WAVE format.
// The sample rate can be any value.
public
class
WaveReader
// It supports only 16-bit, single channel WAVE format.
// The sample rate can be any value.
public
class
WaveReader
{
public
WaveReader
(
string
fileName
)
{
public
WaveReader
(
String
fileName
)
if
(!
File
.
Exists
(
fileName
)
)
{
if
(!
File
.
Exists
(
fileName
))
{
throw
new
ApplicationException
(
$
"{fileName} does not exist!"
);
}
using
(
var
stream
=
File
.
Open
(
fileName
,
FileMode
.
Open
))
{
using
(
var
reader
=
new
BinaryReader
(
stream
))
{
_header
=
ReadHeader
(
reader
);
if
(!
_header
.
Validate
())
{
throw
new
ApplicationException
(
$
"Invalid wave file ${fileName}"
);
}
SkipMetaData
(
reader
);
// now read samples
// _header.SubChunk2Size contains number of bytes in total.
// we assume each sample is of type int16
byte
[]
buffer
=
reader
.
ReadBytes
(
_header
.
SubChunk2Size
);
short
[]
samples_int16
=
new
short
[
_header
.
SubChunk2Size
/
2
];
Buffer
.
BlockCopy
(
buffer
,
0
,
samples_int16
,
0
,
buffer
.
Length
);
_samples
=
new
float
[
samples_int16
.
Length
];
for
(
var
i
=
0
;
i
<
samples_int16
.
Length
;
++
i
)
{
_samples
[
i
]
=
samples_int16
[
i
]
/
32768.0F
;
}
}
}
throw
new
ApplicationException
(
$
"{fileName} does not exist!"
);
}
private
static
WaveHeader
ReadHeader
(
BinaryReader
reader
)
{
byte
[]
bytes
=
reader
.
ReadBytes
(
Marshal
.
SizeOf
(
typeof
(
WaveHeader
)));
using
var
stream
=
File
.
Open
(
fileName
,
FileMode
.
Open
);
using
var
reader
=
new
BinaryReader
(
stream
);
GCHandle
handle
=
GCHandle
.
Alloc
(
bytes
,
GCHandleType
.
Pinned
);
WaveHeader
header
=
(
WaveHeader
)
Marshal
.
PtrToStructure
(
handle
.
AddrOfPinnedObject
(),
typeof
(
WaveHeader
))!;
handle
.
Free
();
_header
=
ReadHeader
(
reader
);
return
header
;
if
(!
_header
.
Validate
())
{
throw
new
ApplicationException
(
$
"Invalid wave file ${fileName}"
);
}
private
void
SkipMetaData
(
BinaryReader
reader
)
SkipMetaData
(
reader
);
// now read samples
// _header.SubChunk2Size contains number of bytes in total.
// we assume each sample is of type int16
var
buffer
=
reader
.
ReadBytes
(
_header
.
SubChunk2Size
);
var
samples_int16
=
new
short
[
_header
.
SubChunk2Size
/
2
];
Buffer
.
BlockCopy
(
buffer
,
0
,
samples_int16
,
0
,
buffer
.
Length
);
_samples
=
new
float
[
samples_int16
.
Length
];
for
(
var
i
=
0
;
i
<
samples_int16
.
Length
;
++
i
)
{
var
bs
=
reader
.
BaseStream
;
Int32
subChunk2ID
=
_header
.
SubChunk2ID
;
Int32
subChunk2Size
=
_header
.
SubChunk2Size
;
while
(
bs
.
Position
!=
bs
.
Length
&&
subChunk2ID
!=
0x61746164
)
{
bs
.
Seek
(
subChunk2Size
,
SeekOrigin
.
Current
);
subChunk2ID
=
reader
.
ReadInt32
();
subChunk2Size
=
reader
.
ReadInt32
();
}
_header
.
SubChunk2ID
=
subChunk2ID
;
_header
.
SubChunk2Size
=
subChunk2Size
;
_samples
[
i
]
=
samples_int16
[
i
]
/
32768.0F
;
}
}
private
WaveHeader
_header
;
private
static
WaveHeader
ReadHeader
(
BinaryReader
reader
)
{
var
bytes
=
reader
.
ReadBytes
(
Marshal
.
SizeOf
(
typeof
(
WaveHeader
)));
GCHandle
handle
=
GCHandle
.
Alloc
(
bytes
,
GCHandleType
.
Pinned
);
WaveHeader
header
=
(
WaveHeader
)
Marshal
.
PtrToStructure
(
handle
.
AddrOfPinnedObject
(),
typeof
(
WaveHeader
))!;
handle
.
Free
();
return
header
;
}
// Samples are normalized to the range [-1, 1]
private
float
[]
_samples
;
private
void
SkipMetaData
(
BinaryReader
reader
)
{
var
bs
=
reader
.
BaseStream
;
public
int
SampleRate
=>
_header
.
SampleRate
;
public
float
[]
Samples
=>
_samples
;
var
subChunk2ID
=
_header
.
SubChunk2ID
;
var
subChunk2Size
=
_header
.
SubChunk2Size
;
public
static
void
Test
(
String
fileName
)
while
(
bs
.
Position
!=
bs
.
Length
&&
subChunk2ID
!=
0x61746164
)
{
WaveReader
reader
=
new
WaveReader
(
fileName
);
Console
.
WriteLine
(
$
"samples length: {reader.Samples.Length}"
);
Console
.
WriteLine
(
$
"samples rate: {reader.SampleRate}"
);
bs
.
Seek
(
subChunk2Size
,
SeekOrigin
.
Current
);
subChunk2ID
=
reader
.
ReadInt32
();
subChunk2Size
=
reader
.
ReadInt32
();
}
_header
.
SubChunk2ID
=
subChunk2ID
;
_header
.
SubChunk2Size
=
subChunk2Size
;
}
private
WaveHeader
_header
;
// Samples are normalized to the range [-1, 1]
private
float
[]
_samples
;
public
int
SampleRate
=>
_header
.
SampleRate
;
public
float
[]
Samples
=>
_samples
;
public
static
void
Test
(
string
fileName
)
{
WaveReader
reader
=
new
WaveReader
(
fileName
);
Console
.
WriteLine
(
$
"samples length: {reader.Samples.Length}"
);
Console
.
WriteLine
(
$
"samples rate: {reader.SampleRate}"
);
}
}
...
...
dotnet-examples/keyword-spotting-from-files/Program.cs
查看文件 @
8a60985
...
...
@@ -13,8 +13,6 @@
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
KeywordSpotterDemo
{
...
...
@@ -38,11 +36,11 @@ class KeywordSpotterDemo
var
filename
=
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
;
WaveReade
r
waveReader
=
new
WaveReader
(
filename
);
va
r
waveReader
=
new
WaveReader
(
filename
);
Console
.
WriteLine
(
"----------Use pre-defined keywords----------"
);
OnlineStream
s
=
kws
.
CreateStream
();
var
s
=
kws
.
CreateStream
();
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
waveReader
.
Samples
);
float
[]
tailPadding
=
new
float
[(
int
)(
waveReader
.
SampleRate
*
0.3
)];
...
...
@@ -53,7 +51,7 @@ class KeywordSpotterDemo
{
kws
.
Decode
(
s
);
var
result
=
kws
.
GetResult
(
s
);
if
(
result
.
Keyword
!=
""
)
if
(
result
.
Keyword
!=
string
.
Empty
)
{
Console
.
WriteLine
(
"Detected: {0}"
,
result
.
Keyword
);
}
...
...
@@ -70,7 +68,7 @@ class KeywordSpotterDemo
{
kws
.
Decode
(
s
);
var
result
=
kws
.
GetResult
(
s
);
if
(
result
.
Keyword
!=
""
)
if
(
result
.
Keyword
!=
string
.
Empty
)
{
Console
.
WriteLine
(
"Detected: {0}"
,
result
.
Keyword
);
}
...
...
@@ -89,7 +87,7 @@ class KeywordSpotterDemo
{
kws
.
Decode
(
s
);
var
result
=
kws
.
GetResult
(
s
);
if
(
result
.
Keyword
!=
""
)
if
(
result
.
Keyword
!=
string
.
Empty
)
{
Console
.
WriteLine
(
"Detected: {0}"
,
result
.
Keyword
);
}
...
...
dotnet-examples/keyword-spotting-from-files/keyword-spotting-from-files.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>keyword_spotting_from_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/keyword-spotting-from-microphone/Program.cs
查看文件 @
8a60985
...
...
@@ -12,12 +12,9 @@
//
// dotnet run
using
PortAudioSharp
;
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System.Runtime.InteropServices
;
using
System
;
using
PortAudioSharp
;
class
KeywordSpotterDemo
{
...
...
@@ -41,11 +38,11 @@ class KeywordSpotterDemo
var
filename
=
"./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav"
;
WaveReade
r
waveReader
=
new
WaveReader
(
filename
);
va
r
waveReader
=
new
WaveReader
(
filename
);
Console
.
WriteLine
(
"----------Use pre-defined keywords----------"
);
OnlineStream
s
=
kws
.
CreateStream
();
var
s
=
kws
.
CreateStream
();
Console
.
WriteLine
(
PortAudio
.
VersionInfo
.
versionText
);
PortAudio
.
Initialize
();
...
...
@@ -54,7 +51,7 @@ class KeywordSpotterDemo
for
(
int
i
=
0
;
i
!=
PortAudio
.
DeviceCount
;
++
i
)
{
Console
.
WriteLine
(
$
" Device {i}"
);
DeviceInfo
deviceInfo
=
PortAudio
.
GetDeviceInfo
(
i
);
var
deviceInfo
=
PortAudio
.
GetDeviceInfo
(
i
);
Console
.
WriteLine
(
$
" Name: {deviceInfo.name}"
);
Console
.
WriteLine
(
$
" Max input channels: {deviceInfo.maxInputChannels}"
);
Console
.
WriteLine
(
$
" Default sample rate: {deviceInfo.defaultSampleRate}"
);
...
...
@@ -66,12 +63,12 @@ class KeywordSpotterDemo
Environment
.
Exit
(
1
);
}
DeviceInfo
info
=
PortAudio
.
GetDeviceInfo
(
deviceIndex
);
var
info
=
PortAudio
.
GetDeviceInfo
(
deviceIndex
);
Console
.
WriteLine
();
Console
.
WriteLine
(
$
"Use default device {deviceIndex} ({info.name})"
);
StreamParameters
param
=
new
StreamParameters
();
var
param
=
new
StreamParameters
();
param
.
device
=
deviceIndex
;
param
.
channelCount
=
1
;
param
.
sampleFormat
=
SampleFormat
.
Float32
;
...
...
@@ -79,21 +76,21 @@ class KeywordSpotterDemo
param
.
hostApiSpecificStreamInfo
=
IntPtr
.
Zero
;
PortAudioSharp
.
Stream
.
Callback
callback
=
(
IntPtr
input
,
IntPtr
output
,
UInt32
frameCount
,
uint
frameCount
,
ref
StreamCallbackTimeInfo
timeInfo
,
StreamCallbackFlags
statusFlags
,
IntPtr
userData
)
=>
{
float
[]
samples
=
new
float
[
frameCount
];
Marshal
.
Copy
(
input
,
samples
,
0
,
(
Int32
)
frameCount
);
var
samples
=
new
float
[
frameCount
];
Marshal
.
Copy
(
input
,
samples
,
0
,
(
int
)
frameCount
);
s
.
AcceptWaveform
(
config
.
FeatConfig
.
SampleRate
,
samples
);
return
StreamCallbackResult
.
Continue
;
};
PortAudioSharp
.
Stream
stream
=
new
PortAudioSharp
.
Stream
(
inParams
:
param
,
outParams
:
null
,
sampleRate
:
config
.
FeatConfig
.
SampleRate
,
var
stream
=
new
PortAudioSharp
.
Stream
(
inParams
:
param
,
outParams
:
null
,
sampleRate
:
config
.
FeatConfig
.
SampleRate
,
framesPerBuffer
:
0
,
streamFlags
:
StreamFlags
.
ClipOff
,
callback
:
callback
,
...
...
@@ -113,15 +110,13 @@ class KeywordSpotterDemo
}
var
result
=
kws
.
GetResult
(
s
);
if
(
result
.
Keyword
!=
""
)
if
(
result
.
Keyword
!=
string
.
Empty
)
{
Console
.
WriteLine
(
"Detected: {0}"
,
result
.
Keyword
);
}
Thread
.
Sleep
(
200
);
// ms
}
PortAudio
.
Terminate
();
}
}
...
...
dotnet-examples/keyword-spotting-from-microphone/keyword-spotting-from-microphone.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>keyword_spotting_from_microphone</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/offline-decode-files/Program.cs
查看文件 @
8a60985
...
...
@@ -5,17 +5,14 @@
// Please refer to
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
// to download non-streaming models
using
CommandLine.Text
;
using
CommandLine
;
using
CommandLine.Text
;
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
OfflineDecodeFiles
{
class
Options
{
[
Option
(
"sample-rate"
,
Required
=
false
,
Default
=
16000
,
HelpText
=
"Sample rate of the data used to train the model"
)]
public
int
SampleRate
{
get
;
set
;
}
=
16000
;
...
...
@@ -23,58 +20,58 @@ class OfflineDecodeFiles
public
int
FeatureDim
{
get
;
set
;
}
=
80
;
[
Option
(
Required
=
false
,
HelpText
=
"Path to tokens.txt"
)]
public
string
Tokens
{
get
;
set
;
}
=
""
;
public
string
Tokens
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to transducer encoder.onnx. Used only for transducer models"
)]
public
string
Encoder
{
get
;
set
;
}
=
""
;
public
string
Encoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to transducer decoder.onnx. Used only for transducer models"
)]
public
string
Decoder
{
get
;
set
;
}
=
""
;
public
string
Decoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to transducer joiner.onnx. Used only for transducer models"
)]
public
string
Joiner
{
get
;
set
;
}
=
""
;
public
string
Joiner
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"model-type"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"model type"
)]
public
string
ModelType
{
get
;
set
;
}
=
""
;
public
string
ModelType
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"whisper-encoder"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to whisper encoder.onnx. Used only for whisper models"
)]
public
string
WhisperEncoder
{
get
;
set
;
}
=
""
;
public
string
WhisperEncoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"whisper-decoder"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to whisper decoder.onnx. Used only for whisper models"
)]
public
string
WhisperDecoder
{
get
;
set
;
}
=
""
;
public
string
WhisperDecoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"whisper-language"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Language of the input file. Can be empty"
)]
public
string
WhisperLanguage
{
get
;
set
;
}
=
""
;
public
string
WhisperLanguage
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"whisper-task"
,
Required
=
false
,
Default
=
"transcribe"
,
HelpText
=
"transcribe or translate"
)]
public
string
WhisperTask
{
get
;
set
;
}
=
"transcribe"
;
[
Option
(
"moonshine-preprocessor"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to preprocess.onnx. Used only for Moonshine models"
)]
public
string
MoonshinePreprocessor
{
get
;
set
;
}
=
""
;
public
string
MoonshinePreprocessor
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"moonshine-encoder"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to encode.onnx. Used only for Moonshine models"
)]
public
string
MoonshineEncoder
{
get
;
set
;
}
=
""
;
public
string
MoonshineEncoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"moonshine-uncached-decoder"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to uncached_decode.onnx. Used only for Moonshine models"
)]
public
string
MoonshineUncachedDecoder
{
get
;
set
;
}
=
""
;
public
string
MoonshineUncachedDecoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"moonshine-cached-decoder"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to cached_decode.onnx. Used only for Moonshine models"
)]
public
string
MoonshineCachedDecoder
{
get
;
set
;
}
=
""
;
public
string
MoonshineCachedDecoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"tdnn-model"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to tdnn yesno model"
)]
public
string
TdnnModel
{
get
;
set
;
}
=
""
;
public
string
TdnnModel
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
HelpText
=
"Path to model.onnx. Used only for paraformer models"
)]
public
string
Paraformer
{
get
;
set
;
}
=
""
;
public
string
Paraformer
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"nemo-ctc"
,
Required
=
false
,
HelpText
=
"Path to model.onnx. Used only for NeMo CTC models"
)]
public
string
NeMoCtc
{
get
;
set
;
}
=
""
;
public
string
NeMoCtc
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"telespeech-ctc"
,
Required
=
false
,
HelpText
=
"Path to model.onnx. Used only for TeleSpeech CTC models"
)]
public
string
TeleSpeechCtc
{
get
;
set
;
}
=
""
;
public
string
TeleSpeechCtc
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"sense-voice-model"
,
Required
=
false
,
HelpText
=
"Path to model.onnx. Used only for SenseVoice CTC models"
)]
public
string
SenseVoiceModel
{
get
;
set
;
}
=
""
;
public
string
SenseVoiceModel
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"sense-voice-use-itn"
,
Required
=
false
,
HelpText
=
"1 to use inverse text normalization for sense voice."
)]
public
int
SenseVoiceUseItn
{
get
;
set
;
}
=
1
;
...
...
@@ -88,7 +85,7 @@ class OfflineDecodeFiles
[
Option
(
"rule-fsts"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"If not empty, path to rule fst for inverse text normalization"
)]
public
string
RuleFsts
{
get
;
set
;
}
=
""
;
public
string
RuleFsts
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"max-active-paths"
,
Required
=
false
,
Default
=
4
,
HelpText
=
@
"Used only when --decoding--method is modified_beam_search.
...
...
@@ -96,7 +93,7 @@ It specifies number of active paths to keep during the search")]
public
int
MaxActivePaths
{
get
;
set
;
}
=
4
;
[
Option
(
"hotwords-file"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to hotwords.txt"
)]
public
string
HotwordsFile
{
get
;
set
;
}
=
""
;
public
string
HotwordsFile
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"hotwords-score"
,
Required
=
false
,
Default
=
1.5F
,
HelpText
=
"hotwords score"
)]
public
float
HotwordsScore
{
get
;
set
;
}
=
1.5F
;
...
...
@@ -117,7 +114,7 @@ It specifies number of active paths to keep during the search")]
private
static
void
DisplayHelp
<
T
>(
ParserResult
<
T
>
result
,
IEnumerable
<
Error
>
errs
)
{
string
usage
=
@
"
var
usage
=
@
"
#
Zipformer
dotnet
run
\
...
...
@@ -213,42 +210,42 @@ to download pre-trained Tdnn models.
config
.
ModelConfig
.
Tokens
=
options
.
Tokens
;
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
Encoder
))
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
Encoder
))
{
// this is a transducer model
config
.
ModelConfig
.
Transducer
.
Encoder
=
options
.
Encoder
;
config
.
ModelConfig
.
Transducer
.
Decoder
=
options
.
Decoder
;
config
.
ModelConfig
.
Transducer
.
Joiner
=
options
.
Joiner
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
Paraformer
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
Paraformer
))
{
config
.
ModelConfig
.
Paraformer
.
Model
=
options
.
Paraformer
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
NeMoCtc
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
NeMoCtc
))
{
config
.
ModelConfig
.
NeMoCtc
.
Model
=
options
.
NeMoCtc
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
TeleSpeechCtc
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
TeleSpeechCtc
))
{
config
.
ModelConfig
.
TeleSpeechCtc
=
options
.
TeleSpeechCtc
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
WhisperEncoder
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
WhisperEncoder
))
{
config
.
ModelConfig
.
Whisper
.
Encoder
=
options
.
WhisperEncoder
;
config
.
ModelConfig
.
Whisper
.
Decoder
=
options
.
WhisperDecoder
;
config
.
ModelConfig
.
Whisper
.
Language
=
options
.
WhisperLanguage
;
config
.
ModelConfig
.
Whisper
.
Task
=
options
.
WhisperTask
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
TdnnModel
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
TdnnModel
))
{
config
.
ModelConfig
.
Tdnn
.
Model
=
options
.
TdnnModel
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
SenseVoiceModel
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
SenseVoiceModel
))
{
config
.
ModelConfig
.
SenseVoice
.
Model
=
options
.
SenseVoiceModel
;
config
.
ModelConfig
.
SenseVoice
.
UseInverseTextNormalization
=
options
.
SenseVoiceUseItn
;
}
else
if
(!
S
tring
.
IsNullOrEmpty
(
options
.
MoonshinePreprocessor
))
else
if
(!
s
tring
.
IsNullOrEmpty
(
options
.
MoonshinePreprocessor
))
{
config
.
ModelConfig
.
Moonshine
.
Preprocessor
=
options
.
MoonshinePreprocessor
;
config
.
ModelConfig
.
Moonshine
.
Encoder
=
options
.
MoonshineEncoder
;
...
...
@@ -270,17 +267,17 @@ to download pre-trained Tdnn models.
config
.
ModelConfig
.
Debug
=
0
;
OfflineRecognize
r
recognizer
=
new
OfflineRecognizer
(
config
);
va
r
recognizer
=
new
OfflineRecognizer
(
config
);
string
[]
files
=
options
.
Files
.
ToArray
();
var
files
=
options
.
Files
.
ToArray
();
// We create a separate stream for each file
List
<
OfflineStream
>
streams
=
new
List
<
OfflineStream
>();
var
streams
=
new
List
<
OfflineStream
>();
streams
.
EnsureCapacity
(
files
.
Length
);
for
(
int
i
=
0
;
i
!=
files
.
Length
;
++
i
)
{
OfflineStream
s
=
recognizer
.
CreateStream
();
var
s
=
recognizer
.
CreateStream
();
WaveReader
waveReader
=
new
WaveReader
(
files
[
i
]);
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
waveReader
.
Samples
);
...
...
@@ -299,7 +296,7 @@ to download pre-trained Tdnn models.
Console
.
WriteLine
(
"Tokens: [{0}]"
,
string
.
Join
(
", "
,
r
.
Tokens
));
if
(
r
.
Timestamps
!=
null
&&
r
.
Timestamps
.
Length
>
0
)
{
Console
.
Write
(
"Timestamps: ["
);
var
sep
=
""
;
var
sep
=
string
.
Empty
;
for
(
int
k
=
0
;
k
!=
r
.
Timestamps
.
Length
;
++
k
)
{
Console
.
Write
(
"{0}{1}"
,
sep
,
r
.
Timestamps
[
k
].
ToString
(
"0.00"
));
...
...
dotnet-examples/offline-decode-files/offline-decode-files.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>offline_decode_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/offline-punctuation/Program.cs
查看文件 @
8a60985
...
...
@@ -12,8 +12,6 @@
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
OfflinePunctuationDemo
{
...
...
@@ -25,14 +23,14 @@ class OfflinePunctuationDemo
config
.
Model
.
NumThreads
=
1
;
var
punct
=
new
OfflinePunctuation
(
config
);
string
[]
textList
=
new
string
[]
{
var
textList
=
new
string
[]
{
"这是一个测试你好吗How are you我很好thank you are you ok谢谢你"
,
"我们都是木头人不会说话不会动"
,
"The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry"
,
};
Console
.
WriteLine
(
"---------"
);
foreach
(
string
text
in
textList
)
foreach
(
var
text
in
textList
)
{
string
textWithPunct
=
punct
.
AddPunct
(
text
);
Console
.
WriteLine
(
"Input text: {0}"
,
text
);
...
...
dotnet-examples/offline-punctuation/offline-punctuation.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>offline_punctuation</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/offline-speaker-diarization/Program.cs
查看文件 @
8a60985
...
...
@@ -34,7 +34,6 @@ Step 4. Run it
*/
using
SherpaOnnx
;
using
System
;
class
OfflineSpeakerDiarizationDemo
{
...
...
@@ -54,7 +53,7 @@ class OfflineSpeakerDiarizationDemo
var
sd
=
new
OfflineSpeakerDiarization
(
config
);
var
testWaveFile
=
"./0-four-speakers-zh.wav"
;
WaveReade
r
waveReader
=
new
WaveReader
(
testWaveFile
);
va
r
waveReader
=
new
WaveReader
(
testWaveFile
);
if
(
sd
.
SampleRate
!=
waveReader
.
SampleRate
)
{
Console
.
WriteLine
(
$
"Expected sample rate: {sd.SampleRate}. Given: {waveReader.SampleRate}"
);
...
...
@@ -65,19 +64,19 @@ class OfflineSpeakerDiarizationDemo
// var segments = sd.Process(waveReader.Samples); // this one is also ok
var
MyP
rogressCallback
=
(
int
numProcessedChunks
,
int
numTotalChunks
,
IntPtr
arg
)
=>
var
p
rogressCallback
=
(
int
numProcessedChunks
,
int
numTotalChunks
,
IntPtr
arg
)
=>
{
float
progress
=
100.0F
*
numProcessedChunks
/
numTotalChunks
;
Console
.
WriteLine
(
"Progress {0}%"
,
String
.
Format
(
"{0:0.00}"
,
progress
));
var
progress
=
100.0F
*
numProcessedChunks
/
numTotalChunks
;
Console
.
WriteLine
(
"Progress {0}%"
,
string
.
Format
(
"{0:0.00}"
,
progress
));
return
0
;
};
var
callback
=
new
OfflineSpeakerDiarizationProgressCallback
(
MyP
rogressCallback
);
var
callback
=
new
OfflineSpeakerDiarizationProgressCallback
(
p
rogressCallback
);
var
segments
=
sd
.
ProcessWithCallback
(
waveReader
.
Samples
,
callback
,
IntPtr
.
Zero
);
foreach
(
var
s
in
segments
)
{
Console
.
WriteLine
(
"{0} -- {1} speaker_{2}"
,
String
.
Format
(
"{0:0.00}"
,
s
.
Start
),
S
tring
.
Format
(
"{0:0.00}"
,
s
.
End
),
s
.
Speaker
);
Console
.
WriteLine
(
"{0} -- {1} speaker_{2}"
,
string
.
Format
(
"{0:0.00}"
,
s
.
Start
),
s
tring
.
Format
(
"{0:0.00}"
,
s
.
End
),
s
.
Speaker
);
}
}
}
...
...
dotnet-examples/offline-speaker-diarization/offline-speaker-diarization.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>offline_speaker_diarization</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/offline-tts-play/Program.cs
查看文件 @
8a60985
...
...
@@ -10,15 +10,12 @@
// Note that you need a speaker to run this file since it will play
// the generated audio as it is generating.
using
CommandLine.Text
;
using
CommandLine
;
using
CommandLine.Text
;
using
PortAudioSharp
;
using
SherpaOnnx
;
using
System.Collections.Concurrent
;
using
System.Collections.Generic
;
using
System.Runtime.InteropServices
;
using
System.Threading
;
using
System
;
class
OfflineTtsPlayDemo
{
...
...
@@ -26,13 +23,13 @@ class OfflineTtsPlayDemo
{
[
Option
(
"tts-rule-fsts"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"path to rule.fst"
)]
public
string
RuleFsts
{
get
;
set
;
}
public
string
?
RuleFsts
{
get
;
set
;
}
[
Option
(
"vits-dict-dir"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to the directory containing dict for jieba."
)]
public
string
DictDir
{
get
;
set
;
}
public
string
?
DictDir
{
get
;
set
;
}
[
Option
(
"vits-data-dir"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to the directory containing dict for espeak-ng."
)]
public
string
DataDir
{
get
;
set
;
}
public
string
?
DataDir
{
get
;
set
;
}
[
Option
(
"vits-length-scale"
,
Required
=
false
,
Default
=
1
,
HelpText
=
"speech speed. Larger->Slower; Smaller->faster"
)]
public
float
LengthScale
{
get
;
set
;
}
...
...
@@ -44,10 +41,10 @@ class OfflineTtsPlayDemo
public
float
NoiseScaleW
{
get
;
set
;
}
[
Option
(
"vits-lexicon"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to lexicon.txt"
)]
public
string
Lexicon
{
get
;
set
;
}
public
string
?
Lexicon
{
get
;
set
;
}
[
Option
(
"vits-tokens"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to tokens.txt"
)]
public
string
Tokens
{
get
;
set
;
}
public
string
?
Tokens
{
get
;
set
;
}
[
Option
(
"tts-max-num-sentences"
,
Required
=
false
,
Default
=
1
,
HelpText
=
"Maximum number of sentences that we process at a time."
)]
public
int
MaxNumSentences
{
get
;
set
;
}
...
...
@@ -56,16 +53,16 @@ class OfflineTtsPlayDemo
public
int
Debug
{
get
;
set
;
}
[
Option
(
"vits-model"
,
Required
=
true
,
HelpText
=
"Path to VITS model"
)]
public
string
Model
{
get
;
set
;
}
public
string
?
Model
{
get
;
set
;
}
[
Option
(
"sid"
,
Required
=
false
,
Default
=
0
,
HelpText
=
"Speaker ID"
)]
public
int
SpeakerId
{
get
;
set
;
}
[
Option
(
"text"
,
Required
=
true
,
HelpText
=
"Text to synthesize"
)]
public
string
Text
{
get
;
set
;
}
public
string
?
Text
{
get
;
set
;
}
[
Option
(
"output-filename"
,
Required
=
true
,
Default
=
"./generated.wav"
,
HelpText
=
"Path to save the generated audio"
)]
public
string
OutputFilename
{
get
;
set
;
}
public
string
?
OutputFilename
{
get
;
set
;
}
}
static
void
Main
(
string
[]
args
)
...
...
@@ -124,10 +121,9 @@ to download more models.
Console
.
WriteLine
(
helpText
);
}
private
static
void
Run
(
Options
options
)
{
OfflineTtsConfig
config
=
new
OfflineTtsConfig
();
var
config
=
new
OfflineTtsConfig
();
config
.
Model
.
Vits
.
Model
=
options
.
Model
;
config
.
Model
.
Vits
.
Lexicon
=
options
.
Lexicon
;
config
.
Model
.
Vits
.
Tokens
=
options
.
Tokens
;
...
...
@@ -142,10 +138,9 @@ to download more models.
config
.
RuleFsts
=
options
.
RuleFsts
;
config
.
MaxNumSentences
=
options
.
MaxNumSentences
;
OfflineTts
tts
=
new
OfflineTts
(
config
);
float
speed
=
1.0f
/
options
.
LengthScale
;
int
sid
=
options
.
SpeakerId
;
var
tts
=
new
OfflineTts
(
config
);
var
speed
=
1.0f
/
options
.
LengthScale
;
var
sid
=
options
.
SpeakerId
;
Console
.
WriteLine
(
PortAudio
.
VersionInfo
.
versionText
);
PortAudio
.
Initialize
();
...
...
@@ -166,11 +161,11 @@ to download more models.
Environment
.
Exit
(
1
);
}
DeviceInfo
info
=
PortAudio
.
GetDeviceInfo
(
deviceIndex
);
var
info
=
PortAudio
.
GetDeviceInfo
(
deviceIndex
);
Console
.
WriteLine
();
Console
.
WriteLine
(
$
"Use output default device {deviceIndex} ({info.name})"
);
StreamParameters
param
=
new
StreamParameters
();
var
param
=
new
StreamParameters
();
param
.
device
=
deviceIndex
;
param
.
channelCount
=
1
;
param
.
sampleFormat
=
SampleFormat
.
Float32
;
...
...
@@ -178,7 +173,7 @@ to download more models.
param
.
hostApiSpecificStreamInfo
=
IntPtr
.
Zero
;
// https://learn.microsoft.com/en-us/dotnet/standard/collections/thread-safe/blockingcollection-overview
BlockingCollection
<
float
[
]>
dataItems
=
new
BlockingCollection
<
float
[
]>
();
var
dataItems
=
new
BlockingCollection
<
float
[
]>
();
var
MyCallback
=
(
IntPtr
samples
,
int
n
)
=>
{
...
...
@@ -193,9 +188,9 @@ to download more models.
return
1
;
};
bool
playFinished
=
false
;
var
playFinished
=
false
;
float
[]
lastSampleArray
=
null
;
float
[]
?
lastSampleArray
=
null
;
int
lastIndex
=
0
;
// not played
PortAudioSharp
.
Stream
.
Callback
playCallback
=
(
IntPtr
input
,
IntPtr
output
,
...
...
@@ -270,10 +265,10 @@ to download more models.
stream
.
Start
();
OfflineTtsCallback
callback
=
new
OfflineTtsCallback
(
MyCallback
);
var
callback
=
new
OfflineTtsCallback
(
MyCallback
);
OfflineTtsGeneratedAudio
audio
=
tts
.
GenerateWithCallback
(
options
.
Text
,
speed
,
sid
,
callback
);
bool
ok
=
audio
.
SaveToWaveFile
(
options
.
OutputFilename
);
var
audio
=
tts
.
GenerateWithCallback
(
options
.
Text
,
speed
,
sid
,
callback
);
var
ok
=
audio
.
SaveToWaveFile
(
options
.
OutputFilename
);
if
(
ok
)
{
...
...
dotnet-examples/offline-tts-play/offline-tts-play.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>offline_tts_play</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/offline-tts/Program.cs
查看文件 @
8a60985
...
...
@@ -6,28 +6,25 @@
// and
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// to download pre-trained models
using
CommandLine.Text
;
using
CommandLine
;
using
CommandLine.Text
;
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
OfflineTtsDemo
{
class
Options
{
[
Option
(
"tts-rule-fsts"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"path to rule.fst"
)]
public
string
RuleFsts
{
get
;
set
;
}
=
""
;
public
string
RuleFsts
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"tts-rule-fars"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"path to rule.far"
)]
public
string
RuleFars
{
get
;
set
;
}
=
""
;
public
string
RuleFars
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"vits-dict-dir"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to the directory containing dict for jieba."
)]
public
string
DictDir
{
get
;
set
;
}
=
""
;
public
string
DictDir
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"vits-data-dir"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to the directory containing dict for espeak-ng."
)]
public
string
DataDir
{
get
;
set
;
}
=
""
;
public
string
DataDir
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"vits-length-scale"
,
Required
=
false
,
Default
=
1
,
HelpText
=
"speech speed. Larger->Slower; Smaller->faster"
)]
public
float
LengthScale
{
get
;
set
;
}
=
1
;
...
...
@@ -39,10 +36,10 @@ class OfflineTtsDemo
public
float
NoiseScaleW
{
get
;
set
;
}
=
0.8F
;
[
Option
(
"vits-lexicon"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to lexicon.txt"
)]
public
string
Lexicon
{
get
;
set
;
}
=
""
;
public
string
Lexicon
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"vits-tokens"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to tokens.txt"
)]
public
string
Tokens
{
get
;
set
;
}
=
""
;
public
string
Tokens
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"tts-max-num-sentences"
,
Required
=
false
,
Default
=
1
,
HelpText
=
"Maximum number of sentences that we process at a time."
)]
public
int
MaxNumSentences
{
get
;
set
;
}
=
1
;
...
...
@@ -51,13 +48,13 @@ class OfflineTtsDemo
public
int
Debug
{
get
;
set
;
}
=
0
;
[
Option
(
"vits-model"
,
Required
=
true
,
HelpText
=
"Path to VITS model"
)]
public
string
Model
{
get
;
set
;
}
=
""
;
public
string
Model
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"sid"
,
Required
=
false
,
Default
=
0
,
HelpText
=
"Speaker ID"
)]
public
int
SpeakerId
{
get
;
set
;
}
=
0
;
[
Option
(
"text"
,
Required
=
true
,
HelpText
=
"Text to synthesize"
)]
public
string
Text
{
get
;
set
;
}
=
""
;
public
string
Text
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"output-filename"
,
Required
=
true
,
Default
=
"./generated.wav"
,
HelpText
=
"Path to save the generated audio"
)]
public
string
OutputFilename
{
get
;
set
;
}
=
"./generated.wav"
;
...
...
@@ -65,7 +62,7 @@ class OfflineTtsDemo
static
void
Main
(
string
[]
args
)
{
var
parser
=
new
CommandLine
.
Parser
(
with
=>
with
.
HelpWriter
=
null
);
var
parser
=
new
Parser
(
with
=>
with
.
HelpWriter
=
null
);
var
parserResult
=
parser
.
ParseArguments
<
Options
>(
args
);
parserResult
...
...
@@ -75,7 +72,7 @@ class OfflineTtsDemo
private
static
void
DisplayHelp
<
T
>(
ParserResult
<
T
>
result
,
IEnumerable
<
Error
>
errs
)
{
string
usage
=
@
"
var
usage
=
@
"
#
vits
-
aishell3
curl
-
SL
-
O
https
:
//github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
...
...
@@ -122,7 +119,7 @@ to download more models.
private
static
void
Run
(
Options
options
)
{
OfflineTtsConfig
config
=
new
OfflineTtsConfig
();
var
config
=
new
OfflineTtsConfig
();
config
.
Model
.
Vits
.
Model
=
options
.
Model
;
config
.
Model
.
Vits
.
Lexicon
=
options
.
Lexicon
;
config
.
Model
.
Vits
.
Tokens
=
options
.
Tokens
;
...
...
@@ -138,11 +135,11 @@ to download more models.
config
.
RuleFars
=
options
.
RuleFars
;
config
.
MaxNumSentences
=
options
.
MaxNumSentences
;
OfflineTts
tts
=
new
OfflineTts
(
config
);
float
speed
=
1.0f
/
options
.
LengthScale
;
int
sid
=
options
.
SpeakerId
;
OfflineTtsGeneratedAudio
audio
=
tts
.
Generate
(
options
.
Text
,
speed
,
sid
);
bool
ok
=
audio
.
SaveToWaveFile
(
options
.
OutputFilename
);
var
tts
=
new
OfflineTts
(
config
);
var
speed
=
1.0f
/
options
.
LengthScale
;
var
sid
=
options
.
SpeakerId
;
var
audio
=
tts
.
Generate
(
options
.
Text
,
speed
,
sid
);
var
ok
=
audio
.
SaveToWaveFile
(
options
.
OutputFilename
);
if
(
ok
)
{
...
...
dotnet-examples/offline-tts/offline-tts.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>offline_tts</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/online-decode-files/Program.cs
查看文件 @
8a60985
...
...
@@ -6,40 +6,37 @@
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models
using
CommandLine.Text
;
using
CommandLine
;
using
CommandLine.Text
;
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System.Linq
;
using
System
;
class
OnlineDecodeFiles
{
class
Options
{
[
Option
(
Required
=
true
,
HelpText
=
"Path to tokens.txt"
)]
public
string
Tokens
{
get
;
set
;
}
=
""
;
public
string
Tokens
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
Default
=
"cpu"
,
HelpText
=
"Provider, e.g., cpu, coreml"
)]
public
string
Provider
{
get
;
set
;
}
=
""
;
public
string
Provider
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
HelpText
=
"Path to transducer encoder.onnx"
)]
public
string
Encoder
{
get
;
set
;
}
=
""
;
public
string
Encoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
HelpText
=
"Path to transducer decoder.onnx"
)]
public
string
Decoder
{
get
;
set
;
}
=
""
;
public
string
Decoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
Required
=
false
,
HelpText
=
"Path to transducer joiner.onnx"
)]
public
string
Joiner
{
get
;
set
;
}
=
""
;
public
string
Joiner
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"paraformer-encoder"
,
Required
=
false
,
HelpText
=
"Path to paraformer encoder.onnx"
)]
public
string
ParaformerEncoder
{
get
;
set
;
}
=
""
;
public
string
ParaformerEncoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"paraformer-decoder"
,
Required
=
false
,
HelpText
=
"Path to paraformer decoder.onnx"
)]
public
string
ParaformerDecoder
{
get
;
set
;
}
=
""
;
public
string
ParaformerDecoder
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"zipformer2-ctc"
,
Required
=
false
,
HelpText
=
"Path to zipformer2 CTC onnx model"
)]
public
string
Zipformer2Ctc
{
get
;
set
;
}
=
""
;
public
string
Zipformer2Ctc
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"num-threads"
,
Required
=
false
,
Default
=
1
,
HelpText
=
"Number of threads for computation"
)]
public
int
NumThreads
{
get
;
set
;
}
=
1
;
...
...
@@ -80,15 +77,14 @@ larger than this value. Used only when --enable-endpoint is true.")]
public
float
Rule3MinUtteranceLength
{
get
;
set
;
}
=
20.0F
;
[
Option
(
"hotwords-file"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"Path to hotwords.txt"
)]
public
string
HotwordsFile
{
get
;
set
;
}
=
""
;
public
string
HotwordsFile
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"hotwords-score"
,
Required
=
false
,
Default
=
1.5F
,
HelpText
=
"hotwords score"
)]
public
float
HotwordsScore
{
get
;
set
;
}
=
1.5F
;
[
Option
(
"rule-fsts"
,
Required
=
false
,
Default
=
""
,
HelpText
=
"If not empty, path to rule fst for inverse text normalization"
)]
public
string
RuleFsts
{
get
;
set
;
}
=
""
;
public
string
RuleFsts
{
get
;
set
;
}
=
string
.
Empty
;
[
Option
(
"files"
,
Required
=
true
,
HelpText
=
"Audio files for decoding"
)]
public
IEnumerable
<
string
>
Files
{
get
;
set
;
}
=
new
string
[]
{};
...
...
@@ -162,7 +158,7 @@ to download pre-trained streaming models.
private
static
void
Run
(
Options
options
)
{
OnlineRecognizerConfig
config
=
new
OnlineRecognizerConfig
();
var
config
=
new
OnlineRecognizerConfig
();
config
.
FeatConfig
.
SampleRate
=
options
.
SampleRate
;
// All models from icefall using feature dim 80.
...
...
@@ -194,22 +190,22 @@ to download pre-trained streaming models.
config
.
HotwordsScore
=
options
.
HotwordsScore
;
config
.
RuleFsts
=
options
.
RuleFsts
;
OnlineRecognize
r
recognizer
=
new
OnlineRecognizer
(
config
);
va
r
recognizer
=
new
OnlineRecognizer
(
config
);
string
[]
files
=
options
.
Files
.
ToArray
();
var
files
=
options
.
Files
.
ToArray
();
// We create a separate stream for each file
List
<
OnlineStream
>
streams
=
new
List
<
OnlineStream
>();
var
streams
=
new
List
<
OnlineStream
>();
streams
.
EnsureCapacity
(
files
.
Length
);
for
(
int
i
=
0
;
i
!=
files
.
Length
;
++
i
)
{
OnlineStream
s
=
recognizer
.
CreateStream
();
var
s
=
recognizer
.
CreateStream
();
WaveReade
r
waveReader
=
new
WaveReader
(
files
[
i
]);
va
r
waveReader
=
new
WaveReader
(
files
[
i
]);
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
waveReader
.
Samples
);
float
[]
tailPadding
=
new
float
[(
int
)(
waveReader
.
SampleRate
*
0.3
)];
var
tailPadding
=
new
float
[(
int
)(
waveReader
.
SampleRate
*
0.3
)];
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
tailPadding
);
s
.
InputFinished
();
...
...
@@ -230,7 +226,7 @@ to download pre-trained streaming models.
// display results
for
(
int
i
=
0
;
i
!=
files
.
Length
;
++
i
)
{
OnlineRecognizerResult
r
=
recognizer
.
GetResult
(
streams
[
i
]);
var
r
=
recognizer
.
GetResult
(
streams
[
i
]);
var
text
=
r
.
Text
;
var
tokens
=
r
.
Tokens
;
Console
.
WriteLine
(
"--------------------"
);
...
...
@@ -238,7 +234,7 @@ to download pre-trained streaming models.
Console
.
WriteLine
(
"text: {0}"
,
text
);
Console
.
WriteLine
(
"tokens: [{0}]"
,
string
.
Join
(
", "
,
tokens
));
Console
.
Write
(
"timestamps: ["
);
r
.
Timestamps
.
ToList
().
ForEach
(
i
=>
Console
.
Write
(
S
tring
.
Format
(
"{0:0.00}"
,
i
)
+
", "
));
r
.
Timestamps
.
ToList
().
ForEach
(
i
=>
Console
.
Write
(
s
tring
.
Format
(
"{0:0.00}"
,
i
)
+
", "
));
Console
.
WriteLine
(
"]"
);
}
Console
.
WriteLine
(
"--------------------"
);
...
...
dotnet-examples/online-decode-files/online-decode-files.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>online_decode_files</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/sherpa-onnx.sln
查看文件 @
8a60985
...
...
@@ -29,9 +29,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-files
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "keyword-spotting-from-microphone", "keyword-spotting-from-microphone\keyword-spotting-from-microphone.csproj", "{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TTS", "TTS\TTS.csproj", "{DACE4A18-4FC8-4437-92BF-5A90BA81286C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "offline-speaker-diarization", "offline-speaker-diarization\offline-speaker-diarization.csproj", "{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
...
...
@@ -91,10 +89,6 @@ Global
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
{AEE0ED2B-C86F-4952-863C-EAD3219CB4EC}.Release|Any CPU.Build.0 = Release|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{DACE4A18-4FC8-4437-92BF-5A90BA81286C}.Release|Any CPU.Build.0 = Release|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D3A1FF28-A77D-429D-AEAC-2BA77CA682BC}.Release|Any CPU.ActiveCfg = Release|Any CPU
...
...
dotnet-examples/speaker-identification/Program.cs
查看文件 @
8a60985
...
...
@@ -16,20 +16,18 @@
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
SpeakerIdentificationDemo
{
public
static
float
[]
ComputeEmbedding
(
SpeakerEmbeddingExtractor
extractor
,
S
tring
filename
)
public
static
float
[]
ComputeEmbedding
(
SpeakerEmbeddingExtractor
extractor
,
s
tring
filename
)
{
WaveReade
r
reader
=
new
WaveReader
(
filename
);
va
r
reader
=
new
WaveReader
(
filename
);
OnlineStream
stream
=
extractor
.
CreateStream
();
var
stream
=
extractor
.
CreateStream
();
stream
.
AcceptWaveform
(
reader
.
SampleRate
,
reader
.
Samples
);
stream
.
InputFinished
();
float
[]
embedding
=
extractor
.
Compute
(
stream
);
var
embedding
=
extractor
.
Compute
(
stream
);
return
embedding
;
}
...
...
@@ -43,25 +41,25 @@ class SpeakerIdentificationDemo
var
manager
=
new
SpeakerEmbeddingManager
(
extractor
.
Dim
);
string
[]
spk1Files
=
var
spk1Files
=
new
string
[]
{
"./sr-data/enroll/fangjun-sr-1.wav"
,
"./sr-data/enroll/fangjun-sr-2.wav"
,
"./sr-data/enroll/fangjun-sr-3.wav"
,
};
float
[][]
spk1Vec
=
new
float
[
spk1Files
.
Length
][];
var
spk1Vec
=
new
float
[
spk1Files
.
Length
][];
for
(
int
i
=
0
;
i
<
spk1Files
.
Length
;
++
i
)
{
spk1Vec
[
i
]
=
ComputeEmbedding
(
extractor
,
spk1Files
[
i
]);
}
string
[]
spk2Files
=
var
spk2Files
=
new
string
[]
{
"./sr-data/enroll/leijun-sr-1.wav"
,
"./sr-data/enroll/leijun-sr-2.wav"
,
};
float
[][]
spk2Vec
=
new
float
[
spk2Files
.
Length
][];
var
spk2Vec
=
new
float
[
spk2Files
.
Length
][];
for
(
int
i
=
0
;
i
<
spk2Files
.
Length
;
++
i
)
{
...
...
@@ -100,14 +98,14 @@ class SpeakerIdentificationDemo
Console
.
WriteLine
(
"---All speakers---"
);
string
[]
allSpeakers
=
manager
.
GetAllSpeakers
();
var
allSpeakers
=
manager
.
GetAllSpeakers
();
foreach
(
var
s
in
allSpeakers
)
{
Console
.
WriteLine
(
s
);
}
Console
.
WriteLine
(
"------------"
);
string
[]
testFiles
=
var
testFiles
=
new
string
[]
{
"./sr-data/test/fangjun-test-sr-1.wav"
,
"./sr-data/test/leijun-test-sr-1.wav"
,
...
...
@@ -117,9 +115,9 @@ class SpeakerIdentificationDemo
float
threshold
=
0.6f
;
foreach
(
var
file
in
testFiles
)
{
float
[]
embedding
=
ComputeEmbedding
(
extractor
,
file
);
var
embedding
=
ComputeEmbedding
(
extractor
,
file
);
String
name
=
manager
.
Search
(
embedding
,
threshold
);
var
name
=
manager
.
Search
(
embedding
,
threshold
);
if
(
name
==
""
)
{
name
=
"<Unknown>"
;
...
...
dotnet-examples/speaker-identification/speaker-identification.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>speaker_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/speech-recognition-from-microphone/Program.cs
查看文件 @
8a60985
...
...
@@ -6,47 +6,43 @@
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html
// to download streaming models
using
CommandLine.Text
;
using
CommandLine
;
using
CommandLine.Text
;
using
PortAudioSharp
;
using
System.Threading
;
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System.Runtime.InteropServices
;
using
System
;
class
SpeechRecognitionFromMicrophone
{
class
Options
{
[
Option
(
Required
=
true
,
HelpText
=
"Path to tokens.txt"
)]
public
string
Tokens
{
get
;
set
;
}
public
string
?
Tokens
{
get
;
set
;
}
[
Option
(
Required
=
false
,
Default
=
"cpu"
,
HelpText
=
"Provider, e.g., cpu, coreml"
)]
public
string
Provider
{
get
;
set
;
}
public
string
?
Provider
{
get
;
set
;
}
[
Option
(
Required
=
false
,
HelpText
=
"Path to transducer encoder.onnx"
)]
public
string
Encoder
{
get
;
set
;
}
public
string
?
Encoder
{
get
;
set
;
}
[
Option
(
Required
=
false
,
HelpText
=
"Path to transducer decoder.onnx"
)]
public
string
Decoder
{
get
;
set
;
}
public
string
?
Decoder
{
get
;
set
;
}
[
Option
(
Required
=
false
,
HelpText
=
"Path to transducer joiner.onnx"
)]
public
string
Joiner
{
get
;
set
;
}
public
string
?
Joiner
{
get
;
set
;
}
[
Option
(
"paraformer-encoder"
,
Required
=
false
,
HelpText
=
"Path to paraformer encoder.onnx"
)]
public
string
ParaformerEncoder
{
get
;
set
;
}
public
string
?
ParaformerEncoder
{
get
;
set
;
}
[
Option
(
"paraformer-decoder"
,
Required
=
false
,
HelpText
=
"Path to paraformer decoder.onnx"
)]
public
string
ParaformerDecoder
{
get
;
set
;
}
public
string
?
ParaformerDecoder
{
get
;
set
;
}
[
Option
(
"num-threads"
,
Required
=
false
,
Default
=
1
,
HelpText
=
"Number of threads for computation"
)]
public
int
NumThreads
{
get
;
set
;
}
[
Option
(
"decoding-method"
,
Required
=
false
,
Default
=
"greedy_search"
,
HelpText
=
"Valid decoding methods are: greedy_search, modified_beam_search"
)]
public
string
DecodingMethod
{
get
;
set
;
}
public
string
?
DecodingMethod
{
get
;
set
;
}
[
Option
(
Required
=
false
,
Default
=
false
,
HelpText
=
"True to show model info during loading"
)]
public
bool
Debug
{
get
;
set
;
}
...
...
@@ -126,7 +122,7 @@ to download pre-trained streaming models.
private
static
void
Run
(
Options
options
)
{
OnlineRecognizerConfig
config
=
new
OnlineRecognizerConfig
();
var
config
=
new
OnlineRecognizerConfig
();
config
.
FeatConfig
.
SampleRate
=
options
.
SampleRate
;
// All models from icefall using feature dim 80.
...
...
@@ -153,9 +149,9 @@ to download pre-trained streaming models.
config
.
Rule2MinTrailingSilence
=
options
.
Rule2MinTrailingSilence
;
config
.
Rule3MinUtteranceLength
=
options
.
Rule3MinUtteranceLength
;
OnlineRecognize
r
recognizer
=
new
OnlineRecognizer
(
config
);
va
r
recognizer
=
new
OnlineRecognizer
(
config
);
OnlineStream
s
=
recognizer
.
CreateStream
();
var
s
=
recognizer
.
CreateStream
();
Console
.
WriteLine
(
PortAudio
.
VersionInfo
.
versionText
);
PortAudio
.
Initialize
();
...
...
@@ -176,12 +172,12 @@ to download pre-trained streaming models.
Environment
.
Exit
(
1
);
}
DeviceInfo
info
=
PortAudio
.
GetDeviceInfo
(
deviceIndex
);
var
info
=
PortAudio
.
GetDeviceInfo
(
deviceIndex
);
Console
.
WriteLine
();
Console
.
WriteLine
(
$
"Use default device {deviceIndex} ({info.name})"
);
StreamParameters
param
=
new
StreamParameters
();
var
param
=
new
StreamParameters
();
param
.
device
=
deviceIndex
;
param
.
channelCount
=
1
;
param
.
sampleFormat
=
SampleFormat
.
Float32
;
...
...
@@ -189,14 +185,14 @@ to download pre-trained streaming models.
param
.
hostApiSpecificStreamInfo
=
IntPtr
.
Zero
;
PortAudioSharp
.
Stream
.
Callback
callback
=
(
IntPtr
input
,
IntPtr
output
,
UInt32
frameCount
,
uint
frameCount
,
ref
StreamCallbackTimeInfo
timeInfo
,
StreamCallbackFlags
statusFlags
,
IntPtr
userData
)
=>
{
float
[]
samples
=
new
float
[
frameCount
];
Marshal
.
Copy
(
input
,
samples
,
0
,
(
Int32
)
frameCount
);
var
samples
=
new
float
[
frameCount
];
Marshal
.
Copy
(
input
,
samples
,
0
,
(
int
)
frameCount
);
s
.
AcceptWaveform
(
options
.
SampleRate
,
samples
);
...
...
@@ -215,7 +211,7 @@ to download pre-trained streaming models.
stream
.
Start
();
String
lastText
=
""
;
var
lastText
=
string
.
Empty
;
int
segmentIndex
=
0
;
while
(
true
)
...
...
@@ -245,9 +241,5 @@ to download pre-trained streaming models.
Thread
.
Sleep
(
200
);
// ms
}
PortAudio
.
Terminate
();
}
}
...
...
dotnet-examples/speech-recognition-from-microphone/speech-recognition-from-microphone.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>speech_recognition_from_microphone</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/spoken-language-identification/Program.cs
查看文件 @
8a60985
...
...
@@ -15,12 +15,9 @@
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
SpokenLanguageIdentificationDemo
{
static
void
Main
(
string
[]
args
)
{
var
config
=
new
SpokenLanguageIdentificationConfig
();
...
...
@@ -30,7 +27,7 @@ class SpokenLanguageIdentificationDemo
var
slid
=
new
SpokenLanguageIdentification
(
config
);
var
filename
=
"./sherpa-onnx-whisper-tiny/test_wavs/0.wav"
;
WaveReade
r
waveReader
=
new
WaveReader
(
filename
);
va
r
waveReader
=
new
WaveReader
(
filename
);
var
s
=
slid
.
CreateStream
();
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
waveReader
.
Samples
);
...
...
dotnet-examples/spoken-language-identification/spoken-language-identification.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>spoken_language_identification</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/streaming-hlg-decoding/Program.cs
查看文件 @
8a60985
...
...
@@ -13,12 +13,9 @@
// dotnet run
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
StreamingHlgDecodingDemo
{
static
void
Main
(
string
[]
args
)
{
var
config
=
new
OnlineRecognizerConfig
();
...
...
@@ -32,15 +29,15 @@ class StreamingHlgDecodingDemo
config
.
ModelConfig
.
Debug
=
0
;
config
.
CtcFstDecoderConfig
.
Graph
=
"./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"
;
OnlineRecognize
r
recognizer
=
new
OnlineRecognizer
(
config
);
va
r
recognizer
=
new
OnlineRecognizer
(
config
);
var
filename
=
"./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
;
WaveReader
waveReader
=
new
WaveReader
(
filename
);
OnlineStream
s
=
recognizer
.
CreateStream
();
var
waveReader
=
new
WaveReader
(
filename
);
var
s
=
recognizer
.
CreateStream
();
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
waveReader
.
Samples
);
float
[]
tailPadding
=
new
float
[(
int
)(
waveReader
.
SampleRate
*
0.3
)];
var
tailPadding
=
new
float
[(
int
)(
waveReader
.
SampleRate
*
0.3
)];
s
.
AcceptWaveform
(
waveReader
.
SampleRate
,
tailPadding
);
s
.
InputFinished
();
...
...
@@ -49,7 +46,7 @@ class StreamingHlgDecodingDemo
recognizer
.
Decode
(
s
);
}
OnlineRecognizerResult
r
=
recognizer
.
GetResult
(
s
);
var
r
=
recognizer
.
GetResult
(
s
);
var
text
=
r
.
Text
;
var
tokens
=
r
.
Tokens
;
Console
.
WriteLine
(
"--------------------"
);
...
...
@@ -57,10 +54,8 @@ class StreamingHlgDecodingDemo
Console
.
WriteLine
(
"text: {0}"
,
text
);
Console
.
WriteLine
(
"tokens: [{0}]"
,
string
.
Join
(
", "
,
tokens
));
Console
.
Write
(
"timestamps: ["
);
r
.
Timestamps
.
ToList
().
ForEach
(
i
=>
Console
.
Write
(
S
tring
.
Format
(
"{0:0.00}"
,
i
)
+
", "
));
r
.
Timestamps
.
ToList
().
ForEach
(
i
=>
Console
.
Write
(
s
tring
.
Format
(
"{0:0.00}"
,
i
)
+
", "
));
Console
.
WriteLine
(
"]"
);
Console
.
WriteLine
(
"--------------------"
);
}
}
...
...
dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>streaming_hlg_decoding</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
查看文件 @
8a60985
...
...
@@ -3,8 +3,6 @@
// This file shows how to use a silero_vad model with a non-streaming Paraformer
// for speech recognition.
using
SherpaOnnx
;
using
System.Collections.Generic
;
using
System
;
class
VadNonStreamingAsrParaformer
{
...
...
@@ -12,45 +10,49 @@ class VadNonStreamingAsrParaformer
{
// please download model files from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
OfflineRecognizerConfig
config
=
new
OfflineRecognizerConfig
();
var
config
=
new
OfflineRecognizerConfig
();
config
.
ModelConfig
.
Paraformer
.
Model
=
"./sherpa-onnx-paraformer-zh-2023-09-14/model.int8.onnx"
;
config
.
ModelConfig
.
Tokens
=
"./sherpa-onnx-paraformer-zh-2023-09-14/tokens.txt"
;
config
.
ModelConfig
.
Debug
=
0
;
OfflineRecognize
r
recognizer
=
new
OfflineRecognizer
(
config
);
va
r
recognizer
=
new
OfflineRecognizer
(
config
);
VadModelConfig
vadModelConfig
=
new
VadModelConfig
();
var
vadModelConfig
=
new
VadModelConfig
();
vadModelConfig
.
SileroVad
.
Model
=
"./silero_vad.onnx"
;
vadModelConfig
.
Debug
=
0
;
VoiceActivityDetecto
r
vad
=
new
VoiceActivityDetector
(
vadModelConfig
,
60
);
va
r
vad
=
new
VoiceActivityDetector
(
vadModelConfig
,
60
);
string
testWaveFilename
=
"./lei-jun-test.wav"
;
WaveReader
reader
=
new
WaveReader
(
testWaveFilename
);
var
testWaveFilename
=
"./lei-jun-test.wav"
;
var
reader
=
new
WaveReader
(
testWaveFilename
);
int
numSamples
=
reader
.
Samples
.
Length
;
int
windowSize
=
vadModelConfig
.
SileroVad
.
WindowSize
;
int
sampleRate
=
vadModelConfig
.
SampleRate
;
int
numIter
=
numSamples
/
windowSize
;
for
(
int
i
=
0
;
i
!=
numIter
;
++
i
)
{
for
(
int
i
=
0
;
i
!=
numIter
;
++
i
)
{
int
start
=
i
*
windowSize
;
float
[]
samples
=
new
float
[
windowSize
];
var
samples
=
new
float
[
windowSize
];
Array
.
Copy
(
reader
.
Samples
,
start
,
samples
,
0
,
windowSize
);
vad
.
AcceptWaveform
(
samples
);
if
(
vad
.
IsSpeechDetected
())
{
while
(!
vad
.
IsEmpty
())
{
if
(
vad
.
IsSpeechDetected
())
{
while
(!
vad
.
IsEmpty
())
{
SpeechSegment
segment
=
vad
.
Front
();
float
startTime
=
segment
.
Start
/
(
float
)
sampleRate
;
float
duration
=
segment
.
Samples
.
Length
/
(
float
)
sampleRate
;
var
startTime
=
segment
.
Start
/
(
float
)
sampleRate
;
var
duration
=
segment
.
Samples
.
Length
/
(
float
)
sampleRate
;
OfflineStream
stream
=
recognizer
.
CreateStream
();
stream
.
AcceptWaveform
(
sampleRate
,
segment
.
Samples
);
recognizer
.
Decode
(
stream
);
String
text
=
stream
.
Result
.
Text
;
var
text
=
stream
.
Result
.
Text
;
if
(!
String
.
IsNullOrEmpty
(
text
))
{
Console
.
WriteLine
(
"{0}--{1}: {2}"
,
String
.
Format
(
"{0:0.00}"
,
startTime
),
String
.
Format
(
"{0:0.00}"
,
startTime
+
duration
),
text
);
if
(!
string
.
IsNullOrEmpty
(
text
))
{
Console
.
WriteLine
(
"{0}--{1}: {2}"
,
string
.
Format
(
"{0:0.00}"
,
startTime
),
string
.
Format
(
"{0:0.00}"
,
startTime
+
duration
),
text
);
}
vad
.
Pop
();
...
...
@@ -60,19 +62,21 @@ class VadNonStreamingAsrParaformer
vad
.
Flush
();
while
(!
vad
.
IsEmpty
())
{
SpeechSegment
segment
=
vad
.
Front
();
while
(!
vad
.
IsEmpty
())
{
var
segment
=
vad
.
Front
();
float
startTime
=
segment
.
Start
/
(
float
)
sampleRate
;
float
duration
=
segment
.
Samples
.
Length
/
(
float
)
sampleRate
;
OfflineStream
stream
=
recognizer
.
CreateStream
();
var
stream
=
recognizer
.
CreateStream
();
stream
.
AcceptWaveform
(
sampleRate
,
segment
.
Samples
);
recognizer
.
Decode
(
stream
);
String
text
=
stream
.
Result
.
Text
;
var
text
=
stream
.
Result
.
Text
;
if
(!
String
.
IsNullOrEmpty
(
text
))
{
Console
.
WriteLine
(
"{0}--{1}: {2}"
,
String
.
Format
(
"{0:0.00}"
,
startTime
),
String
.
Format
(
"{0:0.00}"
,
startTime
+
duration
),
text
);
if
(!
string
.
IsNullOrEmpty
(
text
))
{
Console
.
WriteLine
(
"{0}--{1}: {2}"
,
string
.
Format
(
"{0:0.00}"
,
startTime
),
string
.
Format
(
"{0:0.00}"
,
startTime
+
duration
),
text
);
}
vad
.
Pop
();
...
...
dotnet-examples/vad-non-streaming-asr-paraformer/vad-non-streaming-asr-paraformer.csproj
查看文件 @
8a60985
...
...
@@ -2,7 +2,7 @@
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net
6
.0</TargetFramework>
<TargetFramework>net
8
.0</TargetFramework>
<RootNamespace>vad_non_streaming_asr_paraformer</RootNamespace>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
...
...
请
注册
或
登录
后发表评论