Committed by
GitHub
Add C API for streaming HLG decoding (#734)
正在显示
39 个修改的文件
包含
838 行增加
和
7 行删除
| @@ -58,6 +58,13 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | @@ -58,6 +58,13 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | ||
| 58 | node ./test-online-zipformer2-ctc.js | 58 | node ./test-online-zipformer2-ctc.js |
| 59 | rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 | 59 | rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 |
| 60 | 60 | ||
| 61 | + | ||
| 62 | +curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 63 | +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 64 | +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 65 | +node ./test-online-zipformer2-ctc-hlg.js | ||
| 66 | +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 | ||
| 67 | + | ||
| 61 | # offline tts | 68 | # offline tts |
| 62 | 69 | ||
| 63 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 | 70 | curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2 |
| @@ -7,6 +7,10 @@ echo "pwd: $PWD" | @@ -7,6 +7,10 @@ echo "pwd: $PWD" | ||
| 7 | cd swift-api-examples | 7 | cd swift-api-examples |
| 8 | ls -lh | 8 | ls -lh |
| 9 | 9 | ||
| 10 | +./run-streaming-hlg-decode-file.sh | ||
| 11 | +rm ./streaming-hlg-decode-file | ||
| 12 | +rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18 | ||
| 13 | + | ||
| 10 | ./run-spoken-language-identification.sh | 14 | ./run-spoken-language-identification.sh |
| 11 | rm -rf sherpa-onnx-whisper* | 15 | rm -rf sherpa-onnx-whisper* |
| 12 | 16 | ||
| @@ -31,4 +35,5 @@ sed -i.bak '20d' ./decode-file.swift | @@ -31,4 +35,5 @@ sed -i.bak '20d' ./decode-file.swift | ||
| 31 | 35 | ||
| 32 | ./run-decode-file-non-streaming.sh | 36 | ./run-decode-file-non-streaming.sh |
| 33 | 37 | ||
| 38 | + | ||
| 34 | ls -lh | 39 | ls -lh |
| @@ -178,6 +178,7 @@ jobs: | @@ -178,6 +178,7 @@ jobs: | ||
| 178 | cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/ | 178 | cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/ |
| 179 | cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/ | 179 | cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/ |
| 180 | cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/ | 180 | cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/ |
| 181 | + cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding | ||
| 181 | 182 | ||
| 182 | ls -lh /tmp | 183 | ls -lh /tmp |
| 183 | 184 |
| @@ -66,12 +66,77 @@ jobs: | @@ -66,12 +66,77 @@ jobs: | ||
| 66 | run: | | 66 | run: | |
| 67 | gcc --version | 67 | gcc --version |
| 68 | 68 | ||
| 69 | - - name: Test speaker identification | 69 | + - name: Test streaming HLG decoding (Linux/macOS) |
| 70 | + if: matrix.os != 'windows-latest' | ||
| 71 | + shell: bash | ||
| 72 | + run: | | ||
| 73 | + cd go-api-examples/streaming-hlg-decoding/ | ||
| 74 | + ./run.sh | ||
| 75 | + | ||
| 76 | + - name: Test speaker identification (Linux/macOS) | ||
| 77 | + if: matrix.os != 'windows-latest' | ||
| 70 | shell: bash | 78 | shell: bash |
| 71 | run: | | 79 | run: | |
| 72 | cd go-api-examples/speaker-identification | 80 | cd go-api-examples/speaker-identification |
| 73 | ./run.sh | 81 | ./run.sh |
| 74 | 82 | ||
| 83 | + - name: Test speaker identification (Win64) | ||
| 84 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x64' | ||
| 85 | + shell: bash | ||
| 86 | + run: | | ||
| 87 | + cd go-api-examples/speaker-identification | ||
| 88 | + go mod tidy | ||
| 89 | + cat go.mod | ||
| 90 | + go build | ||
| 91 | + | ||
| 92 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx | ||
| 93 | + git clone https://github.com/csukuangfj/sr-data | ||
| 94 | + ls -lh | ||
| 95 | + echo $PWD | ||
| 96 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 97 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* | ||
| 98 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll . | ||
| 99 | + ls -lh | ||
| 100 | + go mod tidy | ||
| 101 | + go build | ||
| 102 | + go run ./main.go | ||
| 103 | + | ||
| 104 | + - name: Test speaker identification (Win32) | ||
| 105 | + if: matrix.os == 'windows-latest' && matrix.arch == 'x86' | ||
| 106 | + shell: bash | ||
| 107 | + run: | | ||
| 108 | + cd go-api-examples/speaker-identification | ||
| 109 | + go mod tidy | ||
| 110 | + cat go.mod | ||
| 111 | + ls -lh | ||
| 112 | + | ||
| 113 | + go env GOARCH | ||
| 114 | + go env | ||
| 115 | + echo "------------------------------" | ||
| 116 | + go env -w GOARCH=386 | ||
| 117 | + go env -w CGO_ENABLED=1 | ||
| 118 | + go env | ||
| 119 | + | ||
| 120 | + go clean | ||
| 121 | + go build | ||
| 122 | + | ||
| 123 | + echo $PWD | ||
| 124 | + | ||
| 125 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx | ||
| 126 | + git clone https://github.com/csukuangfj/sr-data | ||
| 127 | + ls -lh | ||
| 128 | + echo $PWD | ||
| 129 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/ | ||
| 130 | + ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/* | ||
| 131 | + cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll . | ||
| 132 | + ls -lh | ||
| 133 | + go mod tidy | ||
| 134 | + go build | ||
| 135 | + go run ./main.go | ||
| 136 | + | ||
| 137 | + rm -rf sr-data | ||
| 138 | + rm -rf *.onnx | ||
| 139 | + | ||
| 75 | - name: Test non-streaming TTS (Linux/macOS) | 140 | - name: Test non-streaming TTS (Linux/macOS) |
| 76 | if: matrix.os != 'windows-latest' | 141 | if: matrix.os != 'windows-latest' |
| 77 | shell: bash | 142 | shell: bash |
| @@ -74,6 +74,12 @@ jobs: | @@ -74,6 +74,12 @@ jobs: | ||
| 74 | go mod tidy | 74 | go mod tidy |
| 75 | go build | 75 | go build |
| 76 | 76 | ||
| 77 | + - name: Test streaming HLG decoding | ||
| 78 | + shell: bash | ||
| 79 | + run: | | ||
| 80 | + cd scripts/go/_internal/streaming-hlg-decoding/ | ||
| 81 | + ./run.sh | ||
| 82 | + | ||
| 77 | - name: Test speaker identification | 83 | - name: Test speaker identification |
| 78 | shell: bash | 84 | shell: bash |
| 79 | run: | | 85 | run: | |
| @@ -15,6 +15,9 @@ target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api) | @@ -15,6 +15,9 @@ target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api) | ||
| 15 | add_executable(speaker-identification-c-api speaker-identification-c-api.c) | 15 | add_executable(speaker-identification-c-api speaker-identification-c-api.c) |
| 16 | target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) | 16 | target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api) |
| 17 | 17 | ||
| 18 | +add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c) | ||
| 19 | +target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api) | ||
| 20 | + | ||
| 18 | if(SHERPA_ONNX_HAS_ALSA) | 21 | if(SHERPA_ONNX_HAS_ALSA) |
| 19 | add_subdirectory(./asr-microphone-example) | 22 | add_subdirectory(./asr-microphone-example) |
| 20 | elseif((UNIX AND NOT APPLE) OR LINUX) | 23 | elseif((UNIX AND NOT APPLE) OR LINUX) |
| 1 | +// c-api-examples/streaming-hlg-decode-file-c-api.c | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 4 | +/* | ||
| 5 | +We use the following model as an example | ||
| 6 | + | ||
| 7 | +// clang-format off | ||
| 8 | + | ||
| 9 | +Download the model from | ||
| 10 | +https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 11 | + | ||
| 12 | +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 13 | +rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 14 | + | ||
| 15 | +build/bin/streaming-hlg-decode-file-c-api | ||
| 16 | + | ||
| 17 | +(The above model is from https://github.com/k2-fsa/icefall/pull/1557) | ||
| 18 | +*/ | ||
| 19 | +#include <stdio.h> | ||
| 20 | +#include <stdlib.h> | ||
| 21 | +#include <string.h> | ||
| 22 | + | ||
| 23 | +#include "sherpa-onnx/c-api/c-api.h" | ||
| 24 | + | ||
| 25 | +int32_t main() { | ||
| 26 | + // clang-format off | ||
| 27 | + // | ||
| 28 | + // Please download the model from | ||
| 29 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 30 | + const char *model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"; | ||
| 31 | + const char *tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"; | ||
| 32 | + const char *graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"; | ||
| 33 | + const char *wav_filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; | ||
| 34 | + // clang-format on | ||
| 35 | + | ||
| 36 | + SherpaOnnxOnlineRecognizerConfig config; | ||
| 37 | + | ||
| 38 | + memset(&config, 0, sizeof(config)); | ||
| 39 | + config.feat_config.sample_rate = 16000; | ||
| 40 | + config.feat_config.feature_dim = 80; | ||
| 41 | + config.model_config.zipformer2_ctc.model = model; | ||
| 42 | + config.model_config.tokens = tokens; | ||
| 43 | + config.model_config.num_threads = 1; | ||
| 44 | + config.model_config.provider = "cpu"; | ||
| 45 | + config.model_config.debug = 0; | ||
| 46 | + config.ctc_fst_decoder_config.graph = graph; | ||
| 47 | + const SherpaOnnxOnlineRecognizer *recognizer = | ||
| 48 | + CreateOnlineRecognizer(&config); | ||
| 49 | + if (!recognizer) { | ||
| 50 | + fprintf(stderr, "Failed to create recognizer"); | ||
| 51 | + exit(-1); | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer); | ||
| 55 | + | ||
| 56 | + const SherpaOnnxDisplay *display = CreateDisplay(50); | ||
| 57 | + int32_t segment_id = 0; | ||
| 58 | + | ||
| 59 | + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); | ||
| 60 | + if (wave == NULL) { | ||
| 61 | + fprintf(stderr, "Failed to read %s\n", wav_filename); | ||
| 62 | + exit(-1); | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | +// simulate streaming. You can choose an arbitrary N | ||
| 66 | +#define N 3200 | ||
| 67 | + | ||
| 68 | + int16_t buffer[N]; | ||
| 69 | + float samples[N]; | ||
| 70 | + fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n", | ||
| 71 | + wave->sample_rate, wave->num_samples, | ||
| 72 | + (float)wave->num_samples / wave->sample_rate); | ||
| 73 | + | ||
| 74 | + int32_t k = 0; | ||
| 75 | + while (k < wave->num_samples) { | ||
| 76 | + int32_t start = k; | ||
| 77 | + int32_t end = | ||
| 78 | + (start + N > wave->num_samples) ? wave->num_samples : (start + N); | ||
| 79 | + k += N; | ||
| 80 | + | ||
| 81 | + AcceptWaveform(stream, wave->sample_rate, wave->samples + start, | ||
| 82 | + end - start); | ||
| 83 | + while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 84 | + DecodeOnlineStream(recognizer, stream); | ||
| 85 | + } | ||
| 86 | + | ||
| 87 | + const SherpaOnnxOnlineRecognizerResult *r = | ||
| 88 | + GetOnlineStreamResult(recognizer, stream); | ||
| 89 | + | ||
| 90 | + if (strlen(r->text)) { | ||
| 91 | + SherpaOnnxPrint(display, segment_id, r->text); | ||
| 92 | + } | ||
| 93 | + | ||
| 94 | + if (IsEndpoint(recognizer, stream)) { | ||
| 95 | + if (strlen(r->text)) { | ||
| 96 | + ++segment_id; | ||
| 97 | + } | ||
| 98 | + Reset(recognizer, stream); | ||
| 99 | + } | ||
| 100 | + | ||
| 101 | + DestroyOnlineRecognizerResult(r); | ||
| 102 | + } | ||
| 103 | + | ||
| 104 | + // add some tail padding | ||
| 105 | + float tail_paddings[4800] = {0}; // 0.3 seconds at 16 kHz sample rate | ||
| 106 | + AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800); | ||
| 107 | + | ||
| 108 | + SherpaOnnxFreeWave(wave); | ||
| 109 | + | ||
| 110 | + InputFinished(stream); | ||
| 111 | + while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 112 | + DecodeOnlineStream(recognizer, stream); | ||
| 113 | + } | ||
| 114 | + | ||
| 115 | + const SherpaOnnxOnlineRecognizerResult *r = | ||
| 116 | + GetOnlineStreamResult(recognizer, stream); | ||
| 117 | + | ||
| 118 | + if (strlen(r->text)) { | ||
| 119 | + SherpaOnnxPrint(display, segment_id, r->text); | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + DestroyOnlineRecognizerResult(r); | ||
| 123 | + | ||
| 124 | + DestroyDisplay(display); | ||
| 125 | + DestroyOnlineStream(stream); | ||
| 126 | + DestroyOnlineRecognizer(recognizer); | ||
| 127 | + fprintf(stderr, "\n"); | ||
| 128 | + | ||
| 129 | + return 0; | ||
| 130 | +} |
| @@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline | @@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline | ||
| 15 | EndProject | 15 | EndProject |
| 16 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}" | 16 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}" |
| 17 | EndProject | 17 | EndProject |
| 18 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}" | ||
| 19 | +EndProject | ||
| 18 | Global | 20 | Global |
| 19 | GlobalSection(SolutionConfigurationPlatforms) = preSolution | 21 | GlobalSection(SolutionConfigurationPlatforms) = preSolution |
| 20 | Debug|Any CPU = Debug|Any CPU | 22 | Debug|Any CPU = Debug|Any CPU |
| @@ -48,5 +50,9 @@ Global | @@ -48,5 +50,9 @@ Global | ||
| 48 | {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU | 50 | {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU |
| 49 | {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU | 51 | {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU |
| 50 | {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU | 52 | {3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU |
| 53 | + {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 54 | + {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 55 | + {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 56 | + {C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 51 | EndGlobalSection | 57 | EndGlobalSection |
| 52 | EndGlobal | 58 | EndGlobal |
| 1 | +// Copyright (c) 2024 Xiaomi Corporation | ||
| 2 | +// | ||
| 3 | +// This file shows how to do streaming HLG decoding. | ||
| 4 | +// | ||
| 5 | +// 1. Download the model for testing | ||
| 6 | +// | ||
| 7 | +// curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 8 | +// tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 9 | +// rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 10 | +// | ||
| 11 | +// 2. Now run it | ||
| 12 | +// | ||
| 13 | +// dotnet run | ||
| 14 | + | ||
| 15 | +using SherpaOnnx; | ||
| 16 | +using System.Collections.Generic; | ||
| 17 | +using System; | ||
| 18 | + | ||
| 19 | +class StreamingHlgDecodingDemo | ||
| 20 | +{ | ||
| 21 | + | ||
| 22 | + static void Main(string[] args) | ||
| 23 | + { | ||
| 24 | + var config = new OnlineRecognizerConfig(); | ||
| 25 | + config.FeatConfig.SampleRate = 16000; | ||
| 26 | + config.FeatConfig.FeatureDim = 80; | ||
| 27 | + config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"; | ||
| 28 | + | ||
| 29 | + config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"; | ||
| 30 | + config.ModelConfig.Provider = "cpu"; | ||
| 31 | + config.ModelConfig.NumThreads = 1; | ||
| 32 | + config.ModelConfig.Debug = 0; | ||
| 33 | + config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"; | ||
| 34 | + | ||
| 35 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 36 | + | ||
| 37 | + var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"; | ||
| 38 | + | ||
| 39 | + WaveReader waveReader = new WaveReader(filename); | ||
| 40 | + OnlineStream s = recognizer.CreateStream(); | ||
| 41 | + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | ||
| 42 | + | ||
| 43 | + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | ||
| 44 | + s.AcceptWaveform(waveReader.SampleRate, tailPadding); | ||
| 45 | + s.InputFinished(); | ||
| 46 | + | ||
| 47 | + while (recognizer.IsReady(s)) | ||
| 48 | + { | ||
| 49 | + recognizer.Decode(s); | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + OnlineRecognizerResult r = recognizer.GetResult(s); | ||
| 53 | + var text = r.Text; | ||
| 54 | + var tokens = r.Tokens; | ||
| 55 | + Console.WriteLine("--------------------"); | ||
| 56 | + Console.WriteLine(filename); | ||
| 57 | + Console.WriteLine("text: {0}", text); | ||
| 58 | + Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens)); | ||
| 59 | + Console.Write("timestamps: ["); | ||
| 60 | + r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", ")); | ||
| 61 | + Console.WriteLine("]"); | ||
| 62 | + Console.WriteLine("--------------------"); | ||
| 63 | + } | ||
| 64 | +} | ||
| 65 | + | ||
| 66 | + |
| 1 | +../online-decode-files/WaveReader.cs |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 8 | + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +dotnet run -c Release |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>streaming_hlg_decoding</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" /> | ||
| 13 | + </ItemGroup> | ||
| 14 | + | ||
| 15 | +</Project> |
| 1 | +package main | ||
| 2 | + | ||
| 3 | +import ( | ||
| 4 | + "bytes" | ||
| 5 | + "encoding/binary" | ||
| 6 | + sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx" | ||
| 7 | + "github.com/youpy/go-wav" | ||
| 8 | + "log" | ||
| 9 | + "os" | ||
| 10 | + "strings" | ||
| 11 | +) | ||
| 12 | + | ||
| 13 | +func main() { | ||
| 14 | + log.SetFlags(log.LstdFlags | log.Lmicroseconds) | ||
| 15 | + | ||
| 16 | + config := sherpa.OnlineRecognizerConfig{} | ||
| 17 | + config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80} | ||
| 18 | + | ||
| 19 | + // please download model files from | ||
| 20 | + // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 21 | + config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx" | ||
| 22 | + config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt" | ||
| 23 | + | ||
| 24 | + config.ModelConfig.NumThreads = 1 | ||
| 25 | + config.ModelConfig.Debug = 0 | ||
| 26 | + config.ModelConfig.Provider = "cpu" | ||
| 27 | + config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst" | ||
| 28 | + | ||
| 29 | + wav_filename := "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav" | ||
| 30 | + | ||
| 31 | + samples, sampleRate := readWave(wav_filename) | ||
| 32 | + | ||
| 33 | + log.Println("Initializing recognizer (may take several seconds)") | ||
| 34 | + recognizer := sherpa.NewOnlineRecognizer(&config) | ||
| 35 | + log.Println("Recognizer created!") | ||
| 36 | + defer sherpa.DeleteOnlineRecognizer(recognizer) | ||
| 37 | + | ||
| 38 | + log.Println("Start decoding!") | ||
| 39 | + stream := sherpa.NewOnlineStream(recognizer) | ||
| 40 | + defer sherpa.DeleteOnlineStream(stream) | ||
| 41 | + | ||
| 42 | + stream.AcceptWaveform(sampleRate, samples) | ||
| 43 | + | ||
| 44 | + tailPadding := make([]float32, int(float32(sampleRate)*0.3)) | ||
| 45 | + stream.AcceptWaveform(sampleRate, tailPadding) | ||
| 46 | + | ||
| 47 | + for recognizer.IsReady(stream) { | ||
| 48 | + recognizer.Decode(stream) | ||
| 49 | + } | ||
| 50 | + log.Println("Decoding done!") | ||
| 51 | + result := recognizer.GetResult(stream) | ||
| 52 | + log.Println(strings.ToLower(result.Text)) | ||
| 53 | + log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate)) | ||
| 54 | +} | ||
| 55 | + | ||
| 56 | +func readWave(filename string) (samples []float32, sampleRate int) { | ||
| 57 | + file, _ := os.Open(filename) | ||
| 58 | + defer file.Close() | ||
| 59 | + | ||
| 60 | + reader := wav.NewReader(file) | ||
| 61 | + format, err := reader.Format() | ||
| 62 | + if err != nil { | ||
| 63 | + log.Fatalf("Failed to read wave format") | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + if format.AudioFormat != 1 { | ||
| 67 | + log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat) | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + if format.NumChannels != 1 { | ||
| 71 | + log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels) | ||
| 72 | + } | ||
| 73 | + | ||
| 74 | + if format.BitsPerSample != 16 { | ||
| 75 | + log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample) | ||
| 76 | + } | ||
| 77 | + | ||
| 78 | + reader.Duration() // so that it initializes reader.Size | ||
| 79 | + | ||
| 80 | + buf := make([]byte, reader.Size) | ||
| 81 | + n, err := reader.Read(buf) | ||
| 82 | + if n != int(reader.Size) { | ||
| 83 | + log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n) | ||
| 84 | + } | ||
| 85 | + | ||
| 86 | + samples = samplesInt16ToFloat(buf) | ||
| 87 | + sampleRate = int(format.SampleRate) | ||
| 88 | + | ||
| 89 | + return | ||
| 90 | +} | ||
| 91 | + | ||
| 92 | +func samplesInt16ToFloat(inSamples []byte) []float32 { | ||
| 93 | + numSamples := len(inSamples) / 2 | ||
| 94 | + outSamples := make([]float32, numSamples) | ||
| 95 | + | ||
| 96 | + for i := 0; i != numSamples; i++ { | ||
| 97 | + s := inSamples[i*2 : (i+1)*2] | ||
| 98 | + | ||
| 99 | + var s16 int16 | ||
| 100 | + buf := bytes.NewReader(s) | ||
| 101 | + err := binary.Read(buf, binary.LittleEndian, &s16) | ||
| 102 | + if err != nil { | ||
| 103 | + log.Fatal("Failed to parse 16-bit sample") | ||
| 104 | + } | ||
| 105 | + outSamples[i] = float32(s16) / 32768 | ||
| 106 | + } | ||
| 107 | + | ||
| 108 | + return outSamples | ||
| 109 | +} |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then | ||
| 6 | + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 7 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 8 | + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +go mod tidy | ||
| 12 | +go build | ||
| 13 | +ls -lh | ||
| 14 | +./streaming-hlg-decoding |
| @@ -174,3 +174,16 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp | @@ -174,3 +174,16 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp | ||
| 174 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 | 174 | tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2 |
| 175 | node ./test-online-zipformer2-ctc.js | 175 | node ./test-online-zipformer2-ctc.js |
| 176 | ``` | 176 | ``` |
| 177 | + | ||
| 178 | +## ./test-online-zipformer2-ctc-hlg.js | ||
| 179 | +[./test-online-zipformer2-ctc-hlg.js](./test-online-zipformer2-ctc-hlg.js) demonstrates | ||
| 180 | +how to decode a file using a streaming zipformer2 CTC model with HLG. In the code | ||
| 181 | +we use [sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2). | ||
| 182 | + | ||
| 183 | +You can use the following command to run it: | ||
| 184 | + | ||
| 185 | +```bash | ||
| 186 | +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 187 | +tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 188 | +node ./test-online-zipformer2-ctc-hlg.js | ||
| 189 | +``` |
| @@ -50,6 +50,10 @@ function createOnlineRecognizer() { | @@ -50,6 +50,10 @@ function createOnlineRecognizer() { | ||
| 50 | rule3MinUtteranceLength: 20, | 50 | rule3MinUtteranceLength: 20, |
| 51 | hotwordsFile: '', | 51 | hotwordsFile: '', |
| 52 | hotwordsScore: 1.5, | 52 | hotwordsScore: 1.5, |
| 53 | + ctcFstDecoderConfig: { | ||
| 54 | + graph: '', | ||
| 55 | + maxActive: 3000, | ||
| 56 | + } | ||
| 53 | }; | 57 | }; |
| 54 | 58 | ||
| 55 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 59 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -51,6 +51,10 @@ function createOnlineRecognizer() { | @@ -51,6 +51,10 @@ function createOnlineRecognizer() { | ||
| 51 | rule3MinUtteranceLength: 20, | 51 | rule3MinUtteranceLength: 20, |
| 52 | hotwordsFile: '', | 52 | hotwordsFile: '', |
| 53 | hotwordsScore: 1.5, | 53 | hotwordsScore: 1.5, |
| 54 | + ctcFstDecoderConfig: { | ||
| 55 | + graph: '', | ||
| 56 | + maxActive: 3000, | ||
| 57 | + } | ||
| 54 | }; | 58 | }; |
| 55 | 59 | ||
| 56 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 60 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -52,6 +52,10 @@ function createOnlineRecognizer() { | @@ -52,6 +52,10 @@ function createOnlineRecognizer() { | ||
| 52 | rule3MinUtteranceLength: 20, | 52 | rule3MinUtteranceLength: 20, |
| 53 | hotwordsFile: '', | 53 | hotwordsFile: '', |
| 54 | hotwordsScore: 1.5, | 54 | hotwordsScore: 1.5, |
| 55 | + ctcFstDecoderConfig: { | ||
| 56 | + graph: '', | ||
| 57 | + maxActive: 3000, | ||
| 58 | + } | ||
| 55 | }; | 59 | }; |
| 56 | 60 | ||
| 57 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 61 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| @@ -53,6 +53,10 @@ function createOnlineRecognizer() { | @@ -53,6 +53,10 @@ function createOnlineRecognizer() { | ||
| 53 | rule3MinUtteranceLength: 20, | 53 | rule3MinUtteranceLength: 20, |
| 54 | hotwordsFile: '', | 54 | hotwordsFile: '', |
| 55 | hotwordsScore: 1.5, | 55 | hotwordsScore: 1.5, |
| 56 | + ctcFstDecoderConfig: { | ||
| 57 | + graph: '', | ||
| 58 | + maxActive: 3000, | ||
| 59 | + } | ||
| 56 | }; | 60 | }; |
| 57 | 61 | ||
| 58 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 62 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +// | ||
| 3 | +const fs = require('fs'); | ||
| 4 | +const {Readable} = require('stream'); | ||
| 5 | +const wav = require('wav'); | ||
| 6 | + | ||
| 7 | +const sherpa_onnx = require('sherpa-onnx'); | ||
| 8 | + | ||
| 9 | +function createOnlineRecognizer() { | ||
| 10 | + let onlineTransducerModelConfig = { | ||
| 11 | + encoder: '', | ||
| 12 | + decoder: '', | ||
| 13 | + joiner: '', | ||
| 14 | + }; | ||
| 15 | + | ||
| 16 | + let onlineParaformerModelConfig = { | ||
| 17 | + encoder: '', | ||
| 18 | + decoder: '', | ||
| 19 | + }; | ||
| 20 | + | ||
| 21 | + let onlineZipformer2CtcModelConfig = { | ||
| 22 | + model: | ||
| 23 | + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx', | ||
| 24 | + }; | ||
| 25 | + | ||
| 26 | + let onlineModelConfig = { | ||
| 27 | + transducer: onlineTransducerModelConfig, | ||
| 28 | + paraformer: onlineParaformerModelConfig, | ||
| 29 | + zipformer2Ctc: onlineZipformer2CtcModelConfig, | ||
| 30 | + tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt', | ||
| 31 | + numThreads: 1, | ||
| 32 | + provider: 'cpu', | ||
| 33 | + debug: 0, | ||
| 34 | + modelType: '', | ||
| 35 | + }; | ||
| 36 | + | ||
| 37 | + let featureConfig = { | ||
| 38 | + sampleRate: 16000, | ||
| 39 | + featureDim: 80, | ||
| 40 | + }; | ||
| 41 | + | ||
| 42 | + let recognizerConfig = { | ||
| 43 | + featConfig: featureConfig, | ||
| 44 | + modelConfig: onlineModelConfig, | ||
| 45 | + decodingMethod: 'greedy_search', | ||
| 46 | + maxActivePaths: 4, | ||
| 47 | + enableEndpoint: 1, | ||
| 48 | + rule1MinTrailingSilence: 2.4, | ||
| 49 | + rule2MinTrailingSilence: 1.2, | ||
| 50 | + rule3MinUtteranceLength: 20, | ||
| 51 | + hotwordsFile: '', | ||
| 52 | + hotwordsScore: 1.5, | ||
| 53 | + ctcFstDecoderConfig: { | ||
| 54 | + graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst', | ||
| 55 | + maxActive: 3000, | ||
| 56 | + } | ||
| 57 | + }; | ||
| 58 | + | ||
| 59 | + return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | ||
| 60 | +} | ||
| 61 | + | ||
| 62 | +const recognizer = createOnlineRecognizer(); | ||
| 63 | +const stream = recognizer.createStream(); | ||
| 64 | + | ||
| 65 | +const waveFilename = | ||
| 66 | + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav'; | ||
| 67 | + | ||
| 68 | +const reader = new wav.Reader(); | ||
| 69 | +const readable = new Readable().wrap(reader); | ||
| 70 | + | ||
| 71 | +function decode(samples) { | ||
| 72 | + stream.acceptWaveform(gSampleRate, samples); | ||
| 73 | + | ||
| 74 | + while (recognizer.isReady(stream)) { | ||
| 75 | + recognizer.decode(stream); | ||
| 76 | + } | ||
| 77 | + const text = recognizer.getResult(stream); | ||
| 78 | + console.log(text); | ||
| 79 | +} | ||
| 80 | + | ||
| 81 | +let gSampleRate = 16000; | ||
| 82 | + | ||
| 83 | +reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => { | ||
| 84 | + gSampleRate = sampleRate; | ||
| 85 | + | ||
| 86 | + if (audioFormat != 1) { | ||
| 87 | + throw new Error(`Only support PCM format. Given ${audioFormat}`); | ||
| 88 | + } | ||
| 89 | + | ||
| 90 | + if (channels != 1) { | ||
| 91 | + throw new Error(`Only a single channel. Given ${channel}`); | ||
| 92 | + } | ||
| 93 | + | ||
| 94 | + if (bitDepth != 16) { | ||
| 95 | + throw new Error(`Only support 16-bit samples. Given ${bitDepth}`); | ||
| 96 | + } | ||
| 97 | +}); | ||
| 98 | + | ||
| 99 | +fs.createReadStream(waveFilename, {'highWaterMark': 4096}) | ||
| 100 | + .pipe(reader) | ||
| 101 | + .on('finish', function(err) { | ||
| 102 | + // tail padding | ||
| 103 | + const floatSamples = | ||
| 104 | + new Float32Array(recognizer.config.featConfig.sampleRate * 0.5); | ||
| 105 | + decode(floatSamples); | ||
| 106 | + stream.free(); | ||
| 107 | + recognizer.free(); | ||
| 108 | + }); | ||
| 109 | + | ||
| 110 | +readable.on('readable', function() { | ||
| 111 | + let chunk; | ||
| 112 | + while ((chunk = readable.read()) != null) { | ||
| 113 | + const int16Samples = new Int16Array( | ||
| 114 | + chunk.buffer, chunk.byteOffset, | ||
| 115 | + chunk.length / Int16Array.BYTES_PER_ELEMENT); | ||
| 116 | + | ||
| 117 | + const floatSamples = new Float32Array(int16Samples.length); | ||
| 118 | + | ||
| 119 | + for (let i = 0; i < floatSamples.length; i++) { | ||
| 120 | + floatSamples[i] = int16Samples[i] / 32768.0; | ||
| 121 | + } | ||
| 122 | + | ||
| 123 | + decode(floatSamples); | ||
| 124 | + } | ||
| 125 | +}); |
| @@ -51,6 +51,10 @@ function createOnlineRecognizer() { | @@ -51,6 +51,10 @@ function createOnlineRecognizer() { | ||
| 51 | rule3MinUtteranceLength: 20, | 51 | rule3MinUtteranceLength: 20, |
| 52 | hotwordsFile: '', | 52 | hotwordsFile: '', |
| 53 | hotwordsScore: 1.5, | 53 | hotwordsScore: 1.5, |
| 54 | + ctcFstDecoderConfig: { | ||
| 55 | + graph: '', | ||
| 56 | + maxActive: 3000, | ||
| 57 | + } | ||
| 54 | }; | 58 | }; |
| 55 | 59 | ||
| 56 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); | 60 | return sherpa_onnx.createOnlineRecognizer(recognizerConfig); |
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>streaming_hlg_decoding</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <PropertyGroup> | ||
| 12 | + <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources> | ||
| 13 | + </PropertyGroup> | ||
| 14 | + | ||
| 15 | + <ItemGroup> | ||
| 16 | + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" /> | ||
| 17 | + </ItemGroup> | ||
| 18 | + | ||
| 19 | +</Project> |
| @@ -117,6 +117,21 @@ namespace SherpaOnnx | @@ -117,6 +117,21 @@ namespace SherpaOnnx | ||
| 117 | } | 117 | } |
| 118 | 118 | ||
| 119 | [StructLayout(LayoutKind.Sequential)] | 119 | [StructLayout(LayoutKind.Sequential)] |
| 120 | + public struct OnlineCtcFstDecoderConfig | ||
| 121 | + { | ||
| 122 | + public OnlineCtcFstDecoderConfig() | ||
| 123 | + { | ||
| 124 | + Graph = ""; | ||
| 125 | + MaxActive = 3000; | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 129 | + public string Graph; | ||
| 130 | + | ||
| 131 | + public int MaxActive; | ||
| 132 | + } | ||
| 133 | + | ||
| 134 | + [StructLayout(LayoutKind.Sequential)] | ||
| 120 | public struct OnlineRecognizerConfig | 135 | public struct OnlineRecognizerConfig |
| 121 | { | 136 | { |
| 122 | public OnlineRecognizerConfig() | 137 | public OnlineRecognizerConfig() |
| @@ -131,6 +146,7 @@ namespace SherpaOnnx | @@ -131,6 +146,7 @@ namespace SherpaOnnx | ||
| 131 | Rule3MinUtteranceLength = 20.0F; | 146 | Rule3MinUtteranceLength = 20.0F; |
| 132 | HotwordsFile = ""; | 147 | HotwordsFile = ""; |
| 133 | HotwordsScore = 1.5F; | 148 | HotwordsScore = 1.5F; |
| 149 | + CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig(); | ||
| 134 | } | 150 | } |
| 135 | public FeatureConfig FeatConfig; | 151 | public FeatureConfig FeatConfig; |
| 136 | public OnlineModelConfig ModelConfig; | 152 | public OnlineModelConfig ModelConfig; |
| @@ -167,6 +183,8 @@ namespace SherpaOnnx | @@ -167,6 +183,8 @@ namespace SherpaOnnx | ||
| 167 | 183 | ||
| 168 | /// Bonus score for each token in hotwords. | 184 | /// Bonus score for each token in hotwords. |
| 169 | public float HotwordsScore; | 185 | public float HotwordsScore; |
| 186 | + | ||
| 187 | + public OnlineCtcFstDecoderConfig CtcFstDecoderConfig; | ||
| 170 | } | 188 | } |
| 171 | 189 | ||
| 172 | public class OnlineRecognizerResult | 190 | public class OnlineRecognizerResult |
| 1 | +streaming-hlg-decoding |
| 1 | +../../../../go-api-examples/streaming-hlg-decoding/main.go |
| 1 | +../../../../go-api-examples/streaming-hlg-decoding/run.sh |
| @@ -99,6 +99,11 @@ type FeatureConfig struct { | @@ -99,6 +99,11 @@ type FeatureConfig struct { | ||
| 99 | FeatureDim int | 99 | FeatureDim int |
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | +type OnlineCtcFstDecoderConfig struct { | ||
| 103 | + Graph string | ||
| 104 | + MaxActive int | ||
| 105 | +} | ||
| 106 | + | ||
| 102 | // Configuration for the online/streaming recognizer. | 107 | // Configuration for the online/streaming recognizer. |
| 103 | type OnlineRecognizerConfig struct { | 108 | type OnlineRecognizerConfig struct { |
| 104 | FeatConfig FeatureConfig | 109 | FeatConfig FeatureConfig |
| @@ -120,6 +125,7 @@ type OnlineRecognizerConfig struct { | @@ -120,6 +125,7 @@ type OnlineRecognizerConfig struct { | ||
| 120 | Rule1MinTrailingSilence float32 | 125 | Rule1MinTrailingSilence float32 |
| 121 | Rule2MinTrailingSilence float32 | 126 | Rule2MinTrailingSilence float32 |
| 122 | Rule3MinUtteranceLength float32 | 127 | Rule3MinUtteranceLength float32 |
| 128 | + CtcFstDecoderConfig OnlineCtcFstDecoderConfig | ||
| 123 | } | 129 | } |
| 124 | 130 | ||
| 125 | // It contains the recognition result for a online stream. | 131 | // It contains the recognition result for a online stream. |
| @@ -190,6 +196,10 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | @@ -190,6 +196,10 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer { | ||
| 190 | c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence) | 196 | c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence) |
| 191 | c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength) | 197 | c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength) |
| 192 | 198 | ||
| 199 | + c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph) | ||
| 200 | + defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph)) | ||
| 201 | + c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive) | ||
| 202 | + | ||
| 193 | recognizer := &OnlineRecognizer{} | 203 | recognizer := &OnlineRecognizer{} |
| 194 | recognizer.impl = C.CreateOnlineRecognizer(&c) | 204 | recognizer.impl = C.CreateOnlineRecognizer(&c) |
| 195 | 205 |
| @@ -99,6 +99,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | @@ -99,6 +99,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | ||
| 99 | recognizer_config.hotwords_score = | 99 | recognizer_config.hotwords_score = |
| 100 | SHERPA_ONNX_OR(config->hotwords_score, 1.5); | 100 | SHERPA_ONNX_OR(config->hotwords_score, 1.5); |
| 101 | 101 | ||
| 102 | + recognizer_config.ctc_fst_decoder_config.graph = | ||
| 103 | + SHERPA_ONNX_OR(config->ctc_fst_decoder_config.graph, ""); | ||
| 104 | + recognizer_config.ctc_fst_decoder_config.max_active = | ||
| 105 | + SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000); | ||
| 106 | + | ||
| 102 | if (config->model_config.debug) { | 107 | if (config->model_config.debug) { |
| 103 | SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); | 108 | SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str()); |
| 104 | } | 109 | } |
| @@ -96,6 +96,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { | @@ -96,6 +96,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { | ||
| 96 | int32_t feature_dim; | 96 | int32_t feature_dim; |
| 97 | } SherpaOnnxFeatureConfig; | 97 | } SherpaOnnxFeatureConfig; |
| 98 | 98 | ||
| 99 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineCtcFstDecoderConfig { | ||
| 100 | + const char *graph; | ||
| 101 | + int32_t max_active; | ||
| 102 | +} SherpaOnnxOnlineCtcFstDecoderConfig; | ||
| 103 | + | ||
| 99 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { | 104 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { |
| 100 | SherpaOnnxFeatureConfig feat_config; | 105 | SherpaOnnxFeatureConfig feat_config; |
| 101 | SherpaOnnxOnlineModelConfig model_config; | 106 | SherpaOnnxOnlineModelConfig model_config; |
| @@ -131,6 +136,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { | @@ -131,6 +136,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { | ||
| 131 | 136 | ||
| 132 | /// Bonus score for each token in hotwords. | 137 | /// Bonus score for each token in hotwords. |
| 133 | float hotwords_score; | 138 | float hotwords_score; |
| 139 | + | ||
| 140 | + SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config; | ||
| 134 | } SherpaOnnxOnlineRecognizerConfig; | 141 | } SherpaOnnxOnlineRecognizerConfig; |
| 135 | 142 | ||
| 136 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { | 143 | SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { |
| @@ -111,6 +111,15 @@ func sherpaOnnxFeatureConfig( | @@ -111,6 +111,15 @@ func sherpaOnnxFeatureConfig( | ||
| 111 | feature_dim: Int32(featureDim)) | 111 | feature_dim: Int32(featureDim)) |
| 112 | } | 112 | } |
| 113 | 113 | ||
| 114 | +func sherpaOnnxOnlineCtcFstDecoderConfig( | ||
| 115 | + graph: String = "", | ||
| 116 | + maxActive: Int = 3000 | ||
| 117 | +) -> SherpaOnnxOnlineCtcFstDecoderConfig { | ||
| 118 | + return SherpaOnnxOnlineCtcFstDecoderConfig( | ||
| 119 | + graph: toCPointer(graph), | ||
| 120 | + max_active: Int32(maxActive)) | ||
| 121 | +} | ||
| 122 | + | ||
| 114 | func sherpaOnnxOnlineRecognizerConfig( | 123 | func sherpaOnnxOnlineRecognizerConfig( |
| 115 | featConfig: SherpaOnnxFeatureConfig, | 124 | featConfig: SherpaOnnxFeatureConfig, |
| 116 | modelConfig: SherpaOnnxOnlineModelConfig, | 125 | modelConfig: SherpaOnnxOnlineModelConfig, |
| @@ -121,7 +130,8 @@ func sherpaOnnxOnlineRecognizerConfig( | @@ -121,7 +130,8 @@ func sherpaOnnxOnlineRecognizerConfig( | ||
| 121 | decodingMethod: String = "greedy_search", | 130 | decodingMethod: String = "greedy_search", |
| 122 | maxActivePaths: Int = 4, | 131 | maxActivePaths: Int = 4, |
| 123 | hotwordsFile: String = "", | 132 | hotwordsFile: String = "", |
| 124 | - hotwordsScore: Float = 1.5 | 133 | + hotwordsScore: Float = 1.5, |
| 134 | + ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig() | ||
| 125 | ) -> SherpaOnnxOnlineRecognizerConfig { | 135 | ) -> SherpaOnnxOnlineRecognizerConfig { |
| 126 | return SherpaOnnxOnlineRecognizerConfig( | 136 | return SherpaOnnxOnlineRecognizerConfig( |
| 127 | feat_config: featConfig, | 137 | feat_config: featConfig, |
| @@ -133,7 +143,9 @@ func sherpaOnnxOnlineRecognizerConfig( | @@ -133,7 +143,9 @@ func sherpaOnnxOnlineRecognizerConfig( | ||
| 133 | rule2_min_trailing_silence: rule2MinTrailingSilence, | 143 | rule2_min_trailing_silence: rule2MinTrailingSilence, |
| 134 | rule3_min_utterance_length: rule3MinUtteranceLength, | 144 | rule3_min_utterance_length: rule3MinUtteranceLength, |
| 135 | hotwords_file: toCPointer(hotwordsFile), | 145 | hotwords_file: toCPointer(hotwordsFile), |
| 136 | - hotwords_score: hotwordsScore) | 146 | + hotwords_score: hotwordsScore, |
| 147 | + ctc_fst_decoder_config: ctcFstDecoderConfig | ||
| 148 | + ) | ||
| 137 | } | 149 | } |
| 138 | 150 | ||
| 139 | /// Wrapper for recognition result. | 151 | /// Wrapper for recognition result. |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +set -ex | ||
| 4 | + | ||
| 5 | +if [ ! -d ../build-swift-macos ]; then | ||
| 6 | + echo "Please run ../build-swift-macos.sh first!" | ||
| 7 | + exit 1 | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then | ||
| 11 | + echo "Downloading the pre-trained model for testing." | ||
| 12 | + | ||
| 13 | + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 14 | + tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 15 | + rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2 | ||
| 16 | +fi | ||
| 17 | + | ||
| 18 | +if [ ! -e ./streaming-hlg-decode-file ]; then | ||
| 19 | + # Note: We use -lc++ to link against libc++ instead of libstdc++ | ||
| 20 | + swiftc \ | ||
| 21 | + -lc++ \ | ||
| 22 | + -I ../build-swift-macos/install/include \ | ||
| 23 | + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ | ||
| 24 | + ./streaming-hlg-decode-file.swift ./SherpaOnnx.swift \ | ||
| 25 | + -L ../build-swift-macos/install/lib/ \ | ||
| 26 | + -l sherpa-onnx \ | ||
| 27 | + -l onnxruntime \ | ||
| 28 | + -o streaming-hlg-decode-file | ||
| 29 | + | ||
| 30 | + strip ./streaming-hlg-decode-file | ||
| 31 | +else | ||
| 32 | + echo "./streaming-hlg-decode-file exists - skip building" | ||
| 33 | +fi | ||
| 34 | + | ||
| 35 | +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH | ||
| 36 | +./streaming-hlg-decode-file |
| 1 | +import AVFoundation | ||
| 2 | + | ||
| 3 | +extension AudioBuffer { | ||
| 4 | + func array() -> [Float] { | ||
| 5 | + return Array(UnsafeBufferPointer(self)) | ||
| 6 | + } | ||
| 7 | +} | ||
| 8 | + | ||
| 9 | +extension AVAudioPCMBuffer { | ||
| 10 | + func array() -> [Float] { | ||
| 11 | + return self.audioBufferList.pointee.mBuffers.array() | ||
| 12 | + } | ||
| 13 | +} | ||
| 14 | + | ||
| 15 | +func run() { | ||
| 16 | + let filePath = | ||
| 17 | + "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav" | ||
| 18 | + let model = | ||
| 19 | + "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx" | ||
| 20 | + let tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt" | ||
| 21 | + let zipfomer2CtcModelConfig = sherpaOnnxOnlineZipformer2CtcModelConfig( | ||
| 22 | + model: model | ||
| 23 | + ) | ||
| 24 | + | ||
| 25 | + let modelConfig = sherpaOnnxOnlineModelConfig( | ||
| 26 | + tokens: tokens, | ||
| 27 | + zipformer2Ctc: zipfomer2CtcModelConfig | ||
| 28 | + ) | ||
| 29 | + | ||
| 30 | + let featConfig = sherpaOnnxFeatureConfig( | ||
| 31 | + sampleRate: 16000, | ||
| 32 | + featureDim: 80 | ||
| 33 | + ) | ||
| 34 | + | ||
| 35 | + let ctcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig( | ||
| 36 | + graph: "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst", | ||
| 37 | + maxActive: 3000 | ||
| 38 | + ) | ||
| 39 | + | ||
| 40 | + var config = sherpaOnnxOnlineRecognizerConfig( | ||
| 41 | + featConfig: featConfig, | ||
| 42 | + modelConfig: modelConfig, | ||
| 43 | + ctcFstDecoderConfig: ctcFstDecoderConfig | ||
| 44 | + ) | ||
| 45 | + | ||
| 46 | + let recognizer = SherpaOnnxRecognizer(config: &config) | ||
| 47 | + | ||
| 48 | + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) | ||
| 49 | + let audioFile = try! AVAudioFile(forReading: fileURL as URL) | ||
| 50 | + | ||
| 51 | + let audioFormat = audioFile.processingFormat | ||
| 52 | + assert(audioFormat.channelCount == 1) | ||
| 53 | + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) | ||
| 54 | + | ||
| 55 | + let audioFrameCount = UInt32(audioFile.length) | ||
| 56 | + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) | ||
| 57 | + | ||
| 58 | + try! audioFile.read(into: audioFileBuffer!) | ||
| 59 | + let array: [Float]! = audioFileBuffer?.array() | ||
| 60 | + recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate)) | ||
| 61 | + | ||
| 62 | + let tailPadding = [Float](repeating: 0.0, count: 3200) | ||
| 63 | + recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate)) | ||
| 64 | + | ||
| 65 | + recognizer.inputFinished() | ||
| 66 | + while recognizer.isReady() { | ||
| 67 | + recognizer.decode() | ||
| 68 | + } | ||
| 69 | + | ||
| 70 | + let result = recognizer.getResult() | ||
| 71 | + print("\nresult is:\n\(result.text)") | ||
| 72 | +} | ||
| 73 | + | ||
| 74 | +@main | ||
| 75 | +struct App { | ||
| 76 | + static func main() { | ||
| 77 | + run() | ||
| 78 | + } | ||
| 79 | +} |
| @@ -43,6 +43,10 @@ function freeConfig(config, Module) { | @@ -43,6 +43,10 @@ function freeConfig(config, Module) { | ||
| 43 | freeConfig(config.lm, Module) | 43 | freeConfig(config.lm, Module) |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | + if ('ctcFstDecoder' in config) { | ||
| 47 | + freeConfig(config.ctcFstDecoder, Module) | ||
| 48 | + } | ||
| 49 | + | ||
| 46 | Module._free(config.ptr); | 50 | Module._free(config.ptr); |
| 47 | } | 51 | } |
| 48 | 52 | ||
| @@ -193,11 +197,26 @@ function initSherpaOnnxFeatureConfig(config, Module) { | @@ -193,11 +197,26 @@ function initSherpaOnnxFeatureConfig(config, Module) { | ||
| 193 | return {ptr: ptr, len: len}; | 197 | return {ptr: ptr, len: len}; |
| 194 | } | 198 | } |
| 195 | 199 | ||
| 200 | +function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) { | ||
| 201 | + const len = 2 * 4; | ||
| 202 | + const ptr = Module._malloc(len); | ||
| 203 | + | ||
| 204 | + const graphLen = Module.lengthBytesUTF8(config.graph) + 1; | ||
| 205 | + const buffer = Module._malloc(graphLen); | ||
| 206 | + Module.stringToUTF8(config.graph, buffer, graphLen); | ||
| 207 | + | ||
| 208 | + Module.setValue(ptr, buffer, 'i8*'); | ||
| 209 | + Module.setValue(ptr + 4, config.maxActive, 'i32'); | ||
| 210 | + return {ptr: ptr, len: len, buffer: buffer}; | ||
| 211 | +} | ||
| 212 | + | ||
| 196 | function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | 213 | function initSherpaOnnxOnlineRecognizerConfig(config, Module) { |
| 197 | const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); | 214 | const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module); |
| 198 | const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); | 215 | const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module); |
| 216 | + const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig( | ||
| 217 | + config.ctcFstDecoderConfig, Module) | ||
| 199 | 218 | ||
| 200 | - const len = feat.len + model.len + 8 * 4; | 219 | + const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len; |
| 201 | const ptr = Module._malloc(len); | 220 | const ptr = Module._malloc(len); |
| 202 | 221 | ||
| 203 | let offset = 0; | 222 | let offset = 0; |
| @@ -243,8 +262,11 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | @@ -243,8 +262,11 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) { | ||
| 243 | Module.setValue(ptr + offset, config.hotwordsScore, 'float'); | 262 | Module.setValue(ptr + offset, config.hotwordsScore, 'float'); |
| 244 | offset += 4; | 263 | offset += 4; |
| 245 | 264 | ||
| 265 | + Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset); | ||
| 266 | + | ||
| 246 | return { | 267 | return { |
| 247 | - buffer: buffer, ptr: ptr, len: len, feat: feat, model: model | 268 | + buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, |
| 269 | + ctcFstDecoder: ctcFstDecoder | ||
| 248 | } | 270 | } |
| 249 | } | 271 | } |
| 250 | 272 | ||
| @@ -313,6 +335,10 @@ function createOnlineRecognizer(Module, myConfig) { | @@ -313,6 +335,10 @@ function createOnlineRecognizer(Module, myConfig) { | ||
| 313 | rule3MinUtteranceLength: 20, | 335 | rule3MinUtteranceLength: 20, |
| 314 | hotwordsFile: '', | 336 | hotwordsFile: '', |
| 315 | hotwordsScore: 1.5, | 337 | hotwordsScore: 1.5, |
| 338 | + ctcFstDecoderConfig: { | ||
| 339 | + graph: '', | ||
| 340 | + maxActive: 3000, | ||
| 341 | + } | ||
| 316 | }; | 342 | }; |
| 317 | if (myConfig) { | 343 | if (myConfig) { |
| 318 | recognizerConfig = myConfig; | 344 | recognizerConfig = myConfig; |
| @@ -22,9 +22,11 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | @@ -22,9 +22,11 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) == | ||
| 22 | sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4, | 22 | sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4, |
| 23 | ""); | 23 | ""); |
| 24 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); | 24 | static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, ""); |
| 25 | +static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, ""); | ||
| 25 | static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) == | 26 | static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) == |
| 26 | sizeof(SherpaOnnxFeatureConfig) + | 27 | sizeof(SherpaOnnxFeatureConfig) + |
| 27 | - sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4, | 28 | + sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 + |
| 29 | + sizeof(SherpaOnnxOnlineCtcFstDecoderConfig), | ||
| 28 | ""); | 30 | ""); |
| 29 | 31 | ||
| 30 | void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | 32 | void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { |
| @@ -67,6 +69,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | @@ -67,6 +69,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) { | ||
| 67 | config->rule3_min_utterance_length); | 69 | config->rule3_min_utterance_length); |
| 68 | fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); | 70 | fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file); |
| 69 | fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); | 71 | fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score); |
| 72 | + | ||
| 73 | + fprintf(stdout, "----------ctc fst decoder config----------\n"); | ||
| 74 | + fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph); | ||
| 75 | + fprintf(stdout, "max_active: %d\n", | ||
| 76 | + config->ctc_fst_decoder_config.max_active); | ||
| 70 | } | 77 | } |
| 71 | 78 | ||
| 72 | void CopyHeap(const char *src, int32_t num_bytes, char *dst) { | 79 | void CopyHeap(const char *src, int32_t num_bytes, char *dst) { |
-
请 注册 或 登录 后发表评论