Fangjun Kuang
Committed by GitHub

Fix various language binding APIs for tdnn and whisper models (#278)

... ... @@ -67,7 +67,7 @@ jobs:
ls -lh
go mod tidy
cat go.mod
go build -x
go build
ls -lh
git lfs install
... ... @@ -87,6 +87,19 @@ jobs:
./run-nemo-ctc.sh
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
echo "Test Whisper tiny.en"
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
cd sherpa-onnx-whisper-tiny.en
git lfs pull --include "*.onnx"
cd ..
./run-whisper.sh
rm -rf sherpa-onnx-whisper-tiny.en
echo "Test Tdnn yesno"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
- name: Test non-streaming decoding files (Win64)
if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
shell: bash
... ... @@ -121,6 +134,19 @@ jobs:
./run-nemo-ctc.sh
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
echo "Test Whisper tiny.en"
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
cd sherpa-onnx-whisper-tiny.en
git lfs pull --include "*.onnx"
cd ..
./run-whisper.sh
rm -rf sherpa-onnx-whisper-tiny.en
echo "Test Tdnn yesno"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
- name: Test non-streaming decoding files (Win32)
if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
shell: bash
... ... @@ -139,7 +165,7 @@ jobs:
go env
go clean
go build -x
go build
echo $PWD
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
... ... @@ -163,6 +189,19 @@ jobs:
./run-nemo-ctc.sh
rm -rf sherpa-onnx-nemo-ctc-en-conformer-medium
echo "Test Whisper tiny.en"
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
cd sherpa-onnx-whisper-tiny.en
git lfs pull --include "*.onnx"
cd ..
./run-whisper.sh
rm -rf sherpa-onnx-whisper-tiny.en
echo "Test Tdnn yesno"
git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-tdnn-yesno
- name: Test streaming decoding files (Linux/macOS)
if: matrix.os != 'windows-latest'
shell: bash
... ... @@ -171,7 +210,7 @@ jobs:
ls -lh
go mod tidy
cat go.mod
go build -x
go build
ls -lh
git lfs install
... ... @@ -233,7 +272,7 @@ jobs:
go env
go clean
go build -x
go build
echo $PWD
ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
... ...
... ... @@ -72,3 +72,5 @@ jobs:
./run-nemo-ctc.sh
./run-paraformer.sh
./run-zipformer.sh
./run-whisper.sh
./run-tdnn-yesno.sh
... ...
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx)
set(SHERPA_ONNX_VERSION "1.7.7")
set(SHERPA_ONNX_VERSION "1.7.8")
# Disable warning about
#
... ...
... ... @@ -15,18 +15,35 @@ class OfflineDecodeFiles
{
class Options
{
[Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")]
public int SampleRate { get; set; }
[Option("feat-dim", Required = false, Default = 80, HelpText = "Dimension of the features used to train the model")]
public int FeatureDim { get; set; }
[Option(Required = false, HelpText = "Path to tokens.txt")]
public string Tokens { get; set; }
[Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")]
[Option(Required = false, Default = "", HelpText = "Path to transducer encoder.onnx. Used only for transducer models")]
public string Encoder { get; set; }
[Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")]
[Option(Required = false, Default = "", HelpText = "Path to transducer decoder.onnx. Used only for transducer models")]
public string Decoder { get; set; }
[Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")]
[Option(Required = false, Default = "",HelpText = "Path to transducer joiner.onnx. Used only for transducer models")]
public string Joiner { get; set; }
[Option("whisper-encoder", Required = false, Default = "", HelpText = "Path to whisper encoder.onnx. Used only for whisper models")]
public string WhisperEncoder { get; set; }
[Option("whisper-decoder", Required = false, Default = "", HelpText = "Path to whisper decoder.onnx. Used only for whisper models")]
public string WhisperDecoder { get; set; }
[Option("tdnn-model", Required = false, Default = "", HelpText = "Path to tdnn yesno model")]
public string TdnnModel { get; set; }
[Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")]
public string Paraformer { get; set; }
... ... @@ -105,6 +122,38 @@ dotnet run \
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html
to download pre-trained paraformer models
# Whisper
dotnet run \
--whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
--whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
--tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
--files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
to download pre-trained whisper models.
# Tdnn yesno
dotnet run \
--sample-rate=8000 \
--feat-dim=23 \
--tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
--tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
--files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav
Please refer to
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
to download pre-trained Tdnn models.
";
var helpText = HelpText.AutoBuild(result, h =>
... ... @@ -120,6 +169,9 @@ to download pre-trained paraformer models
private static void Run(Options options)
{
OfflineRecognizerConfig config = new OfflineRecognizerConfig();
config.FeatConfig.SampleRate = options.SampleRate;
config.FeatConfig.FeatureDim = options.FeatureDim;
config.ModelConfig.Tokens = options.Tokens;
if (!String.IsNullOrEmpty(options.Encoder))
... ... @@ -137,6 +189,15 @@ to download pre-trained paraformer models
{
config.ModelConfig.NeMoCtc.Model = options.NeMoCtc;
}
else if (!String.IsNullOrEmpty(options.WhisperEncoder))
{
config.ModelConfig.Whisper.Encoder = options.WhisperEncoder;
config.ModelConfig.Whisper.Decoder = options.WhisperDecoder;
}
else if (!String.IsNullOrEmpty(options.TdnnModel))
{
config.ModelConfig.Tdnn.Model = options.TdnnModel;
}
else
{
Console.WriteLine("Please provide a model");
... ...
#!/usr/bin/env bash
if [ ! -d ./sherpa-onnx-tdnn-yesno ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-tdnn-yesno
cd sherpa-onnx-tdnn-yesno
git lfs pull --include "*.onnx"
cd ..
fi
dotnet run \
--sample-rate=8000 \
--feat-dim=23 \
--tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
--tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
--files ./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_0_1_0.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_0_1_1_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_0_1_0_0_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_0_0_1.wav \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_1_1_0_1_1_0.wav
... ...
#!/usr/bin/env bash
if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-whisper-tiny.en
cd sherpa-onnx-whisper-tiny.en
git lfs pull --include "*.onnx"
cd ..
fi
dotnet run \
--num-threads=2 \
--whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
--whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
--tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
--files ./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav \
./sherpa-onnx-whisper-tiny.en/test_wavs/1.wav \
./sherpa-onnx-whisper-tiny.en/test_wavs/8k.wav
... ...
... ... @@ -15,13 +15,23 @@ func main() {
log.SetFlags(log.LstdFlags | log.Lmicroseconds)
config := sherpa.OfflineRecognizerConfig{}
config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}
flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the encoder model")
flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the decoder model")
flag.IntVar(&config.FeatConfig.SampleRate, "sample-rate", 16000, "Sample rate of the data used to train the model")
flag.IntVar(&config.FeatConfig.FeatureDim, "feat-dim", 80, "Dimension of the features used to train the model")
flag.StringVar(&config.ModelConfig.Transducer.Encoder, "encoder", "", "Path to the transducer encoder model")
flag.StringVar(&config.ModelConfig.Transducer.Decoder, "decoder", "", "Path to the transducer decoder model")
flag.StringVar(&config.ModelConfig.Transducer.Joiner, "joiner", "", "Path to the joiner model")
flag.StringVar(&config.ModelConfig.Paraformer.Model, "paraformer", "", "Path to the paraformer model")
flag.StringVar(&config.ModelConfig.NemoCTC.Model, "nemo-ctc", "", "Path to the NeMo CTC model")
flag.StringVar(&config.ModelConfig.Whisper.Encoder, "whisper-encoder", "", "Path to the whisper encoder model")
flag.StringVar(&config.ModelConfig.Whisper.Decoder, "whisper-decoder", "", "Path to the whisper decoder model")
flag.StringVar(&config.ModelConfig.Tdnn.Model, "tdnn-model", "", "Path to the tdnn model")
flag.StringVar(&config.ModelConfig.Tokens, "tokens", "", "Path to the tokens file")
flag.IntVar(&config.ModelConfig.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.ModelConfig.Debug, "debug", 0, "Whether to show debug message")
... ...
... ... @@ -5,7 +5,7 @@
# to download the model
# before you run this script.
#
# You can switch to a different online model if you need
# You can switch to a different offline model if you need
./non-streaming-decode-files \
--nemo-ctc ./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \
... ...
... ... @@ -5,7 +5,6 @@
# to download the model
# before you run this script.
#
# You can switch to a different online model if you need
./non-streaming-decode-files \
--paraformer ./sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx \
... ...
#!/usr/bin/env bash
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/yesno/index.html
# to download the model
# before you run this script.
#
./non-streaming-decode-files \
--sample-rate=8000 \
--feat-dim=23 \
--tokens=./sherpa-onnx-tdnn-yesno/tokens.txt \
--tdnn-model=./sherpa-onnx-tdnn-yesno/model-epoch-14-avg-2.onnx \
./sherpa-onnx-tdnn-yesno/test_wavs/0_0_0_1_0_0_0_1.wav
... ...
... ... @@ -5,7 +5,7 @@
# to download the model
# before you run this script.
#
# You can switch to a different online model if you need
# You can switch to a different offline model if you need
./non-streaming-decode-files \
--encoder ./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx \
... ...
#!/usr/bin/env bash
# Please refer to
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html
# to download the model
# before you run this script.
#
# You can switch to a different offline model if you need
./non-streaming-decode-files \
--whisper-encoder=./sherpa-onnx-whisper-tiny.en/tiny.en-encoder.onnx \
--whisper-decoder=./sherpa-onnx-whisper-tiny.en/tiny.en-decoder.onnx \
--tokens=./sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt \
./sherpa-onnx-whisper-tiny.en/test_wavs/0.wav
... ...
... ... @@ -52,6 +52,32 @@ namespace SherpaOnnx
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineWhisperModelConfig
{
public OfflineWhisperModelConfig()
{
Encoder = "";
Decoder = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Encoder;
[MarshalAs(UnmanagedType.LPStr)]
public string Decoder;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineTdnnModelConfig
{
public OfflineWhisperModelConfig()
{
Model = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
[StructLayout(LayoutKind.Sequential)]
public struct OfflineLMConfig
{
public OfflineLMConfig()
... ... @@ -73,6 +99,8 @@ namespace SherpaOnnx
Transducer = new OfflineTransducerModelConfig();
Paraformer = new OfflineParaformerModelConfig();
NeMoCtc = new OfflineNemoEncDecCtcModelConfig();
Whisper = new OfflineWhisperModelConfig();
Tdnn = new OfflineTdnnModelConfig();
Tokens = "";
NumThreads = 1;
Debug = 0;
... ... @@ -82,6 +110,8 @@ namespace SherpaOnnx
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
public OfflineNemoEncDecCtcModelConfig NeMoCtc;
public OfflineWhisperModelConfig Whisper;
public OfflineTdnnModelConfig Tdnn;
[MarshalAs(UnmanagedType.LPStr)]
public string Tokens;
... ...
... ... @@ -309,6 +309,15 @@ type OfflineNemoEncDecCtcModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
type OfflineWhisperModelConfig struct {
Encoder string
Decoder string
}
type OfflineTdnnModelConfig struct {
Model string
}
// Configuration for offline LM.
type OfflineLMConfig struct {
Model string // Path to the model
... ... @@ -319,6 +328,8 @@ type OfflineModelConfig struct {
Transducer OfflineTransducerModelConfig
Paraformer OfflineParaformerModelConfig
NemoCTC OfflineNemoEncDecCtcModelConfig
Whisper OfflineWhisperModelConfig
Tdnn OfflineTdnnModelConfig
Tokens string // Path to tokens.txt
// Number of threads to use for neural network computation
... ... @@ -390,6 +401,15 @@ func NewOfflineRecognizer(config *OfflineRecognizerConfig) *OfflineRecognizer {
c.model_config.nemo_ctc.model = C.CString(config.ModelConfig.NemoCTC.Model)
defer C.free(unsafe.Pointer(c.model_config.nemo_ctc.model))
c.model_config.whisper.encoder = C.CString(config.ModelConfig.Whisper.Encoder)
defer C.free(unsafe.Pointer(c.model_config.whisper.encoder))
c.model_config.whisper.decoder = C.CString(config.ModelConfig.Whisper.Decoder)
defer C.free(unsafe.Pointer(c.model_config.whisper.decoder))
c.model_config.tdnn.decoder = C.CString(config.ModelConfig.Tdnn.Model)
defer C.free(unsafe.Pointer(c.model_config.tdnn.model))
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
defer C.free(unsafe.Pointer(c.model_config.tokens))
... ...
... ... @@ -271,6 +271,9 @@ SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer(
recognizer_config.model_config.whisper.decoder =
SHERPA_ONNX_OR(config->model_config.whisper.decoder, "");
recognizer_config.model_config.tdnn.model =
SHERPA_ONNX_OR(config->model_config.tdnn.model, "");
recognizer_config.model_config.tokens =
SHERPA_ONNX_OR(config->model_config.tokens, "");
recognizer_config.model_config.num_threads =
... ...
... ... @@ -305,6 +305,10 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineWhisperModelConfig {
const char *decoder;
} SherpaOnnxOfflineWhisperModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTdnnModelConfig {
const char *model;
} SherpaOnnxOfflineTdnnModelConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig {
const char *model;
float scale;
... ... @@ -315,6 +319,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig {
SherpaOnnxOfflineParaformerModelConfig paraformer;
SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc;
SherpaOnnxOfflineWhisperModelConfig whisper;
SherpaOnnxOfflineTdnnModelConfig tdnn;
const char *tokens;
int32_t num_threads;
... ...