Fangjun Kuang
Committed by GitHub

Add keyword spotting for C# (#1105)

@@ -2,7 +2,10 @@ @@ -2,7 +2,10 @@
2 2
3 cd dotnet-examples/ 3 cd dotnet-examples/
4 4
5 -cd ./online-decode-files 5 +cd ./keyword-spotting-from-files
  6 +./run.sh
  7 +
  8 +cd ../online-decode-files
6 ./run-transducer-itn.sh 9 ./run-transducer-itn.sh
7 ./run-zipformer2-ctc.sh 10 ./run-zipformer2-ctc.sh
8 ./run-transducer.sh 11 ./run-transducer.sh
@@ -139,7 +139,7 @@ time $EXE \ @@ -139,7 +139,7 @@ time $EXE \
139 time $EXE \ 139 time $EXE \
140 --tokens=$repo/tokens.txt \ 140 --tokens=$repo/tokens.txt \
141 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ 141 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
142 - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ 142 + --decoder=$repo/decoder-epoch-99-avg-1.onnx \
143 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ 143 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
144 --num-threads=2 \ 144 --num-threads=2 \
145 $repo/test_wavs/0.wav \ 145 $repo/test_wavs/0.wav \
@@ -172,7 +172,7 @@ time $EXE \ @@ -172,7 +172,7 @@ time $EXE \
172 time $EXE \ 172 time $EXE \
173 --tokens=$repo/tokens.txt \ 173 --tokens=$repo/tokens.txt \
174 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ 174 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
175 - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ 175 + --decoder=$repo/decoder-epoch-99-avg-1.onnx \
176 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ 176 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
177 --num-threads=2 \ 177 --num-threads=2 \
178 $repo/test_wavs/0.wav \ 178 $repo/test_wavs/0.wav \
@@ -86,7 +86,7 @@ for wave in ${waves[@]}; do @@ -86,7 +86,7 @@ for wave in ${waves[@]}; do
86 time $EXE \ 86 time $EXE \
87 --tokens=$repo/tokens.txt \ 87 --tokens=$repo/tokens.txt \
88 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ 88 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
89 - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ 89 + --decoder=$repo/decoder-epoch-99-avg-1.onnx \
90 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ 90 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
91 --num-threads=2 \ 91 --num-threads=2 \
92 $wave 92 $wave
@@ -126,7 +126,7 @@ for wave in ${waves[@]}; do @@ -126,7 +126,7 @@ for wave in ${waves[@]}; do
126 time $EXE \ 126 time $EXE \
127 --tokens=$repo/tokens.txt \ 127 --tokens=$repo/tokens.txt \
128 --encoder=$repo/encoder-epoch-11-avg-1.int8.onnx \ 128 --encoder=$repo/encoder-epoch-11-avg-1.int8.onnx \
129 - --decoder=$repo/decoder-epoch-11-avg-1.int8.onnx \ 129 + --decoder=$repo/decoder-epoch-11-avg-1.onnx \
130 --joiner=$repo/joiner-epoch-11-avg-1.int8.onnx \ 130 --joiner=$repo/joiner-epoch-11-avg-1.int8.onnx \
131 --num-threads=2 \ 131 --num-threads=2 \
132 $wave 132 $wave
@@ -168,7 +168,7 @@ for wave in ${waves[@]}; do @@ -168,7 +168,7 @@ for wave in ${waves[@]}; do
168 time $EXE \ 168 time $EXE \
169 --tokens=$repo/tokens.txt \ 169 --tokens=$repo/tokens.txt \
170 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ 170 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
171 - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ 171 + --decoder=$repo/decoder-epoch-99-avg-1.onnx \
172 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ 172 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
173 --num-threads=2 \ 173 --num-threads=2 \
174 $wave 174 $wave
@@ -210,7 +210,7 @@ for wave in ${waves[@]}; do @@ -210,7 +210,7 @@ for wave in ${waves[@]}; do
210 time $EXE \ 210 time $EXE \
211 --tokens=$repo/tokens.txt \ 211 --tokens=$repo/tokens.txt \
212 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ 212 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
213 - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ 213 + --decoder=$repo/decoder-epoch-99-avg-1.onnx \
214 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ 214 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
215 --num-threads=2 \ 215 --num-threads=2 \
216 $wave 216 $wave
@@ -231,7 +231,7 @@ if [ $EXE == "sherpa-onnx-ffmpeg" ]; then @@ -231,7 +231,7 @@ if [ $EXE == "sherpa-onnx-ffmpeg" ]; then
231 time $EXE \ 231 time $EXE \
232 $repo/tokens.txt \ 232 $repo/tokens.txt \
233 $repo/encoder-epoch-99-avg-1.int8.onnx \ 233 $repo/encoder-epoch-99-avg-1.int8.onnx \
234 - $repo/decoder-epoch-99-avg-1.int8.onnx \ 234 + $repo/decoder-epoch-99-avg-1.onnx \
235 $repo/joiner-epoch-99-avg-1.int8.onnx \ 235 $repo/joiner-epoch-99-avg-1.int8.onnx \
236 https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \ 236 https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/resolve/main/test_wavs/4.wav \
237 2 237 2
@@ -271,7 +271,7 @@ for wave in ${waves[@]}; do @@ -271,7 +271,7 @@ for wave in ${waves[@]}; do
271 time $EXE \ 271 time $EXE \
272 --tokens=$repo/tokens.txt \ 272 --tokens=$repo/tokens.txt \
273 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \ 273 --encoder=$repo/encoder-epoch-99-avg-1.int8.onnx \
274 - --decoder=$repo/decoder-epoch-99-avg-1.int8.onnx \ 274 + --decoder=$repo/decoder-epoch-99-avg-1.onnx \
275 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \ 275 --joiner=$repo/joiner-epoch-99-avg-1.int8.onnx \
276 --num-threads=2 \ 276 --num-threads=2 \
277 $wave 277 $wave
@@ -125,12 +125,15 @@ for name in ${wenet_models[@]}; do @@ -125,12 +125,15 @@ for name in ${wenet_models[@]}; do
125 repo=$name 125 repo=$name
126 log "Start testing ${repo_url}" 126 log "Start testing ${repo_url}"
127 127
128 - python3 ./python-api-examples/offline-decode-files.py \  
129 - --tokens=$repo/tokens.txt \  
130 - --wenet-ctc=$repo/model.onnx \  
131 - $repo/test_wavs/0.wav \  
132 - $repo/test_wavs/1.wav \  
133 - $repo/test_wavs/8k.wav 128 + if false; then
  129 + # offline wenet ctc models are not supported by onnxruntime >= 1.18
  130 + python3 ./python-api-examples/offline-decode-files.py \
  131 + --tokens=$repo/tokens.txt \
  132 + --wenet-ctc=$repo/model.onnx \
  133 + $repo/test_wavs/0.wav \
  134 + $repo/test_wavs/1.wav \
  135 + $repo/test_wavs/8k.wav
  136 + fi
134 137
135 python3 ./python-api-examples/online-decode-files.py \ 138 python3 ./python-api-examples/online-decode-files.py \
136 --tokens=$repo/tokens.txt \ 139 --tokens=$repo/tokens.txt \
1 ## 1.10.13 1 ## 1.10.13
2 2
3 * Update onnxruntime from 1.17.1 to 1.18.0 3 * Update onnxruntime from 1.17.1 to 1.18.0
  4 +* Add C# API for Keyword spotting
4 5
5 ## 1.10.12 6 ## 1.10.12
6 7
@@ -21,11 +21,6 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") @@ -21,11 +21,6 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
21 cmake_policy(SET CMP0135 NEW) 21 cmake_policy(SET CMP0135 NEW)
22 endif() 22 endif()
23 23
24 -  
25 -  
26 -  
27 -  
28 -  
29 option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF) 24 option(SHERPA_ONNX_ENABLE_PYTHON "Whether to build Python" OFF)
30 option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF) 25 option(SHERPA_ONNX_ENABLE_TESTS "Whether to build tests" OFF)
31 option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" OFF) 26 option(SHERPA_ONNX_ENABLE_CHECK "Whether to build with assert" OFF)
  1 +// Copyright (c) 2024 Xiaomi Corporation
  2 +//
  3 +// This file shows how to do keyword spotting with sherpa-onnx.
  4 +//
  5 +// 1. Download a model from
  6 +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
  7 +//
  8 +// wget https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  9 +// tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  10 +//
  11 +// 2. Now run it
  12 +//
  13 +// dotnet run
  14 +
  15 +using SherpaOnnx;
  16 +using System.Collections.Generic;
  17 +using System;
  18 +
  19 +class KeywordSpotterDemo
  20 +{
  21 + static void Main(string[] args)
  22 + {
  23 + var config = new KeywordSpotterConfig();
  24 + config.FeatConfig.SampleRate = 16000;
  25 + config.FeatConfig.FeatureDim = 80;
  26 +
  27 + config.ModelConfig.Transducer.Encoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx";
  28 + config.ModelConfig.Transducer.Decoder = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx";
  29 + config.ModelConfig.Transducer.Joiner = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx";
  30 +
  31 + config.ModelConfig.Tokens = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt";
  32 + config.ModelConfig.Provider = "cpu";
  33 + config.ModelConfig.NumThreads = 1;
  34 + config.ModelConfig.Debug = 1;
  35 + config.KeywordsFile = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/test_keywords.txt";
  36 +
  37 + var kws = new KeywordSpotter(config);
  38 +
  39 + var filename = "./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/test_wavs/3.wav";
  40 +
  41 + WaveReader waveReader = new WaveReader(filename);
  42 +
  43 + Console.WriteLine("----------Use pre-defined keywords----------");
  44 +
  45 + OnlineStream s = kws.CreateStream();
  46 + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
  47 +
  48 + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
  49 + s.AcceptWaveform(waveReader.SampleRate, tailPadding);
  50 + s.InputFinished();
  51 +
  52 + while (kws.IsReady(s))
  53 + {
  54 + kws.Decode(s);
  55 + var result = kws.GetResult(s);
  56 + if (result.Keyword != "")
  57 + {
  58 + Console.WriteLine("Detected: {0}", result.Keyword);
  59 + }
  60 + }
  61 +
  62 + Console.WriteLine("----------Use pre-defined keywords + add a new keyword----------");
  63 + s = kws.CreateStream("y ǎn y uán @演员");
  64 + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
  65 +
  66 + s.AcceptWaveform(waveReader.SampleRate, tailPadding);
  67 + s.InputFinished();
  68 +
  69 + while (kws.IsReady(s))
  70 + {
  71 + kws.Decode(s);
  72 + var result = kws.GetResult(s);
  73 + if (result.Keyword != "")
  74 + {
  75 + Console.WriteLine("Detected: {0}", result.Keyword);
  76 + }
  77 + }
  78 +
  79 + Console.WriteLine("----------Use pre-defined keywords + add 2 new keywords----------");
  80 +
  81 + // Note keywords are separated by /
  82 + s = kws.CreateStream("y ǎn y uán @演员/zh ī m íng @知名");
  83 + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
  84 +
  85 + s.AcceptWaveform(waveReader.SampleRate, tailPadding);
  86 + s.InputFinished();
  87 +
  88 + while (kws.IsReady(s))
  89 + {
  90 + kws.Decode(s);
  91 + var result = kws.GetResult(s);
  92 + if (result.Keyword != "")
  93 + {
  94 + Console.WriteLine("Detected: {0}", result.Keyword);
  95 + }
  96 + }
  97 + }
  98 +}
  99 +
  1 +<Project Sdk="Microsoft.NET.Sdk">
  2 +
  3 + <PropertyGroup>
  4 + <OutputType>Exe</OutputType>
  5 + <TargetFramework>net6.0</TargetFramework>
  6 + <RootNamespace>keyword_spotting_from_files</RootNamespace>
  7 + <ImplicitUsings>enable</ImplicitUsings>
  8 + <Nullable>enable</Nullable>
  9 + </PropertyGroup>
  10 +
  11 + <ItemGroup>
  12 + <ProjectReference Include="..\Common\Common.csproj" />
  13 + </ItemGroup>
  14 +
  15 +</Project>
  1 +#!/usr/bin/env bash
  2 +
  3 +set -ex
  4 +
  5 +if [ ! -f ./sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt ]; then
  6 + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/kws-models/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  7 + tar xvf sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  8 + rm sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01.tar.bz2
  9 +fi
  10 +
  11 +dotnet run -c Release
@@ -22,7 +22,7 @@ fi @@ -22,7 +22,7 @@ fi
22 dotnet run -c Release \ 22 dotnet run -c Release \
23 --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ 23 --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
24 --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ 24 --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
25 - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ 25 + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
26 --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ 26 --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
27 --rule-fsts ./itn_zh_number.fst \ 27 --rule-fsts ./itn_zh_number.fst \
28 --decoding-method greedy_search \ 28 --decoding-method greedy_search \
@@ -14,7 +14,7 @@ fi @@ -14,7 +14,7 @@ fi
14 dotnet run -c Release \ 14 dotnet run -c Release \
15 --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ 15 --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
16 --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ 16 --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
17 - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ 17 + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
18 --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ 18 --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \
19 --decoding-method greedy_search \ 19 --decoding-method greedy_search \
20 --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \ 20 --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \
@@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraf @@ -25,6 +25,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "vad-non-streaming-asr-paraf
25 EndProject 25 EndProject
26 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}" 26 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common", "Common\Common.csproj", "{401E963F-E25A-43CE-987D-8DB2D4715756}"
27 EndProject 27 EndProject
  28 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "keyword-spotting-from-files", "keyword-spotting-from-files\keyword-spotting-from-files.csproj", "{A87EDD31-D654-4C9F-AED7-F6F2825659BD}"
  29 +EndProject
28 Global 30 Global
29 GlobalSection(SolutionConfigurationPlatforms) = preSolution 31 GlobalSection(SolutionConfigurationPlatforms) = preSolution
30 Debug|Any CPU = Debug|Any CPU 32 Debug|Any CPU = Debug|Any CPU
@@ -75,6 +77,10 @@ Global @@ -75,6 +77,10 @@ Global
75 {401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU 77 {401E963F-E25A-43CE-987D-8DB2D4715756}.Debug|Any CPU.Build.0 = Debug|Any CPU
76 {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU 78 {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.ActiveCfg = Release|Any CPU
77 {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU 79 {401E963F-E25A-43CE-987D-8DB2D4715756}.Release|Any CPU.Build.0 = Release|Any CPU
  80 + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
  81 + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Debug|Any CPU.Build.0 = Debug|Any CPU
  82 + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.ActiveCfg = Release|Any CPU
  83 + {A87EDD31-D654-4C9F-AED7-F6F2825659BD}.Release|Any CPU.Build.0 = Release|Any CPU
78 EndGlobalSection 84 EndGlobalSection
79 GlobalSection(SolutionProperties) = preSolution 85 GlobalSection(SolutionProperties) = preSolution
80 HideSolutionNode = FALSE 86 HideSolutionNode = FALSE
@@ -18,5 +18,5 @@ fi @@ -18,5 +18,5 @@ fi
18 dotnet run -c Release \ 18 dotnet run -c Release \
19 --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ 19 --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \
20 --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ 20 --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \
21 - --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ 21 + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \
22 --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx 22 --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx
@@ -5,6 +5,7 @@ @@ -5,6 +5,7 @@
5 | Functions | URL | Supported Platforms| 5 | Functions | URL | Supported Platforms|
6 |---|---|---| 6 |---|---|---|
7 |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows| 7 |Streaming speech recognition| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/streaming_asr)| Android, macOS, Windows|
  8 +|Speech synthesis| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/flutter-examples/tts)| Android, iOS, Linux, macOS, Windows|
8 9
9 ## Pure dart-examples 10 ## Pure dart-examples
10 11
1 all 1 all
2 macos-arm64 2 macos-arm64
3 macos-x64 3 macos-x64
4 -linux  
5 -windows 4 +linux-x64
  5 +linux-arm64
  6 +windows-arm64
6 windows-x64 7 windows-x64
7 windows-x86 8 windows-x86
8 packages 9 packages
  10 +tmp
  1 +/// Copyright (c) 2024 Xiaomi Corporation
  2 +
  3 +using System;
  4 +using System.Runtime.InteropServices;
  5 +using System.Text;
  6 +
  7 +namespace SherpaOnnx
  8 +{
  9 + public class KeywordResult
  10 + {
  11 + public KeywordResult(IntPtr handle)
  12 + {
  13 + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl));
  14 +
  15 + // PtrToStringUTF8() requires .net standard 2.1
  16 + // _keyword = Marshal.PtrToStringUTF8(impl.Keyword);
  17 +
  18 + int length = 0;
  19 +
  20 + unsafe
  21 + {
  22 + byte* buffer = (byte*)impl.Keyword;
  23 + while (*buffer != 0)
  24 + {
  25 + ++buffer;
  26 + length += 1;
  27 + }
  28 + }
  29 +
  30 + byte[] stringBuffer = new byte[length];
  31 + Marshal.Copy(impl.Keyword, stringBuffer, 0, length);
  32 + _keyword = Encoding.UTF8.GetString(stringBuffer);
  33 + }
  34 +
  35 + [StructLayout(LayoutKind.Sequential)]
  36 + struct Impl
  37 + {
  38 + public IntPtr Keyword;
  39 + }
  40 +
  41 + private String _keyword;
  42 + public String Keyword => _keyword;
  43 + }
  44 +}
  1 +/// Copyright (c) 2024 Xiaomi Corporation (authors: Fangjun Kuang)
  2 +using System;
  3 +using System.Collections.Generic;
  4 +using System.Runtime.InteropServices;
  5 +using System.Text;
  6 +
  7 +namespace SherpaOnnx
  8 +{
  9 + // please see
  10 + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code
  11 + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources
  12 + public class KeywordSpotter : IDisposable
  13 + {
  14 + public KeywordSpotter(KeywordSpotterConfig config)
  15 + {
  16 + IntPtr h = CreateKeywordSpotter(ref config);
  17 + _handle = new HandleRef(this, h);
  18 + }
  19 +
  20 + public OnlineStream CreateStream()
  21 + {
  22 + IntPtr p = CreateKeywordStream(_handle.Handle);
  23 + return new OnlineStream(p);
  24 + }
  25 +
  26 + public OnlineStream CreateStream(string keywords)
  27 + {
  28 + byte[] utf8Bytes = Encoding.UTF8.GetBytes(keywords);
  29 + IntPtr p = CreateKeywordStreamWithKeywords(_handle.Handle, utf8Bytes);
  30 + return new OnlineStream(p);
  31 + }
  32 +
  33 + /// Return true if the passed stream is ready for decoding.
  34 + public bool IsReady(OnlineStream stream)
  35 + {
  36 + return IsReady(_handle.Handle, stream.Handle) != 0;
  37 + }
  38 +
  39 + /// You have to ensure that IsReady(stream) returns true before
  40 + /// you call this method
  41 + public void Decode(OnlineStream stream)
  42 + {
  43 + Decode(_handle.Handle, stream.Handle);
  44 + }
  45 +
  46 + // The caller should ensure all passed streams are ready for decoding.
  47 + public void Decode(IEnumerable<OnlineStream> streams)
  48 + {
  49 + // TargetFramework=net20 does not support System.Linq
  50 + // IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray();
  51 + List<IntPtr> list = new List<IntPtr>();
  52 + foreach (OnlineStream s in streams)
  53 + {
  54 + list.Add(s.Handle);
  55 + }
  56 +
  57 + IntPtr[] ptrs = list.ToArray();
  58 + Decode(_handle.Handle, ptrs, ptrs.Length);
  59 + }
  60 +
  61 + public KeywordResult GetResult(OnlineStream stream)
  62 + {
  63 + IntPtr h = GetResult(_handle.Handle, stream.Handle);
  64 + KeywordResult result = new KeywordResult(h);
  65 + DestroyResult(h);
  66 + return result;
  67 + }
  68 +
  69 + public void Dispose()
  70 + {
  71 + Cleanup();
  72 + // Prevent the object from being placed on the
  73 + // finalization queue
  74 + System.GC.SuppressFinalize(this);
  75 + }
  76 +
  77 + ~KeywordSpotter()
  78 + {
  79 + Cleanup();
  80 + }
  81 +
  82 + private void Cleanup()
  83 + {
  84 + DestroyKeywordSpotter(_handle.Handle);
  85 +
  86 + // Don't permit the handle to be used again.
  87 + _handle = new HandleRef(this, IntPtr.Zero);
  88 + }
  89 +
  90 + private HandleRef _handle;
  91 +
  92 + [DllImport(Dll.Filename)]
  93 + private static extern IntPtr CreateKeywordSpotter(ref KeywordSpotterConfig config);
  94 +
  95 + [DllImport(Dll.Filename)]
  96 + private static extern void DestroyKeywordSpotter(IntPtr handle);
  97 +
  98 + [DllImport(Dll.Filename)]
  99 + private static extern IntPtr CreateKeywordStream(IntPtr handle);
  100 +
  101 + [DllImport(Dll.Filename)]
  102 + private static extern IntPtr CreateKeywordStreamWithKeywords(IntPtr handle, [MarshalAs(UnmanagedType.LPArray, ArraySubType = UnmanagedType.I1)] byte[] utf8Keywords);
  103 +
  104 + [DllImport(Dll.Filename, EntryPoint = "IsKeywordStreamReady")]
  105 + private static extern int IsReady(IntPtr handle, IntPtr stream);
  106 +
  107 + [DllImport(Dll.Filename, EntryPoint = "DecodeKeywordStream")]
  108 + private static extern void Decode(IntPtr handle, IntPtr stream);
  109 +
  110 + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleKeywordStreams")]
  111 + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n);
  112 +
  113 + [DllImport(Dll.Filename, EntryPoint = "GetKeywordResult")]
  114 + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream);
  115 +
  116 + [DllImport(Dll.Filename, EntryPoint = "DestroyKeywordResult")]
  117 + private static extern void DestroyResult(IntPtr result);
  118 + }
  119 +}
  1 +/// Copyright (c) 2024 Xiaomi Corporation
  2 +
  3 +using System.Runtime.InteropServices;
  4 +
  5 +namespace SherpaOnnx
  6 +{
  7 + [StructLayout(LayoutKind.Sequential)]
  8 + public struct KeywordSpotterConfig
  9 + {
  10 + public KeywordSpotterConfig()
  11 + {
  12 + FeatConfig = new FeatureConfig();
  13 + ModelConfig = new OnlineModelConfig();
  14 +
  15 + MaxActivePaths = 4;
  16 + NumTrailingBlanks = 1;
  17 + KeywordsScore = 1.0F;
  18 + KeywordsThreshold = 0.25F;
  19 + KeywordsFile = "";
  20 + }
  21 + public FeatureConfig FeatConfig;
  22 + public OnlineModelConfig ModelConfig;
  23 +
  24 + public int MaxActivePaths;
  25 + public int NumTrailingBlanks;
  26 + public float KeywordsScore;
  27 + public float KeywordsThreshold;
  28 +
  29 + [MarshalAs(UnmanagedType.LPStr)]
  30 + public string KeywordsFile;
  31 + }
  32 +}
@@ -50,12 +50,12 @@ class TestKeywordSpotter(unittest.TestCase): @@ -50,12 +50,12 @@ class TestKeywordSpotter(unittest.TestCase):
50 for use_int8 in [True, False]: 50 for use_int8 in [True, False]:
51 if use_int8: 51 if use_int8:
52 encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 52 encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
53 - decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 53 + decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
54 joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 54 joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
55 else: 55 else:
56 - encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"  
57 - decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"  
58 - joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 56 + encoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  57 + decoder = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  58 + joiner = f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
59 59
60 tokens = ( 60 tokens = (
61 f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/tokens.txt" 61 f"{d}/sherpa-onnx-kws-zipformer-gigaspeech-3.3M-2024-01-01/tokens.txt"
@@ -109,12 +109,12 @@ class TestKeywordSpotter(unittest.TestCase): @@ -109,12 +109,12 @@ class TestKeywordSpotter(unittest.TestCase):
109 for use_int8 in [True, False]: 109 for use_int8 in [True, False]:
110 if use_int8: 110 if use_int8:
111 encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 111 encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
112 - decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 112 + decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
113 joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 113 joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx"
114 else: 114 else:
115 - encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"  
116 - decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.int8.onnx"  
117 - joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.int8.onnx" 115 + encoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/encoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  116 + decoder = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/decoder-epoch-12-avg-2-chunk-16-left-64.onnx"
  117 + joiner = f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/joiner-epoch-12-avg-2-chunk-16-left-64.onnx"
118 118
119 tokens = ( 119 tokens = (
120 f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt" 120 f"{d}/sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01/tokens.txt"
@@ -52,7 +52,7 @@ class TestOfflineRecognizer(unittest.TestCase): @@ -52,7 +52,7 @@ class TestOfflineRecognizer(unittest.TestCase):
52 for use_int8 in [True, False]: 52 for use_int8 in [True, False]:
53 if use_int8: 53 if use_int8:
54 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx" 54 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx"
55 - decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.int8.onnx" 55 + decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx"
56 joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx" 56 joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx"
57 else: 57 else:
58 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx" 58 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx"
@@ -85,7 +85,7 @@ class TestOfflineRecognizer(unittest.TestCase): @@ -85,7 +85,7 @@ class TestOfflineRecognizer(unittest.TestCase):
85 for use_int8 in [True, False]: 85 for use_int8 in [True, False]:
86 if use_int8: 86 if use_int8:
87 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx" 87 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.int8.onnx"
88 - decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.int8.onnx" 88 + decoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx"
89 joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx" 89 joiner = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.int8.onnx"
90 else: 90 else:
91 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx" 91 encoder = f"{d}/sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx"
@@ -50,7 +50,7 @@ class TestOnlineRecognizer(unittest.TestCase): @@ -50,7 +50,7 @@ class TestOnlineRecognizer(unittest.TestCase):
50 for use_int8 in [True, False]: 50 for use_int8 in [True, False]:
51 if use_int8: 51 if use_int8:
52 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx" 52 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"
53 - decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx" 53 + decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
54 joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx" 54 joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx"
55 else: 55 else:
56 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx" 56 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
@@ -90,7 +90,7 @@ class TestOnlineRecognizer(unittest.TestCase): @@ -90,7 +90,7 @@ class TestOnlineRecognizer(unittest.TestCase):
90 for use_int8 in [True, False]: 90 for use_int8 in [True, False]:
91 if use_int8: 91 if use_int8:
92 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx" 92 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx"
93 - decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx" 93 + decoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
94 joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx" 94 joiner = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx"
95 else: 95 else:
96 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx" 96 encoder = f"{d}/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"