Add C API for streaming HLG decoding (#734)

Fangjun Kuang · GitHub
Commit dbff2eaadba78729c7660c0cd0cbf2f5f0252007 dbff2eaa 1 parent db67e00c
.github/scripts/test-dot-net.sh
.github/scripts/test-nodejs-npm.sh
.github/scripts/test-swift.sh
.github/workflows/test-dot-net.yaml
.github/workflows/test-go-package.yaml
.github/workflows/test-go.yaml
c-api-examples/CMakeLists.txt
c-api-examples/streaming-hlg-decode-file-c-api.c
cmake/onnxruntime.cmake
dotnet-examples/sherpa-onnx.sln
dotnet-examples/streaming-hlg-decoding/Program.cs
dotnet-examples/streaming-hlg-decoding/WaveReader.cs
dotnet-examples/streaming-hlg-decoding/run.sh
dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
go-api-examples/streaming-hlg-decoding/go.mod
go-api-examples/streaming-hlg-decoding/main.go
go-api-examples/streaming-hlg-decoding/run.sh
nodejs-examples/README.md
nodejs-examples/test-online-paraformer-microphone.js
nodejs-examples/test-online-paraformer.js
--- a/.github/scripts/test-dot-net.sh
查看文件 @dbff2ea
+++ b/.github/scripts/test-dot-net.sh
查看文件 @dbff2ea
@@ -2,7 +2,10 @@
 
 cd dotnet-examples/
 
- cd spoken-language-identification
+ cd streaming-hlg-decoding/
+ ./run.sh
+ 
+ cd ../spoken-language-identification
 ./run.sh
 
 cd ../online-decode-files
--- a/.github/scripts/test-nodejs-npm.sh
查看文件 @dbff2ea
+++ b/.github/scripts/test-nodejs-npm.sh
查看文件 @dbff2ea
@@ -58,6 +58,13 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
 node ./test-online-zipformer2-ctc.js
 rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
 
+ 
+ curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ node ./test-online-zipformer2-ctc-hlg.js
+ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
+ 
 # offline tts
 
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
--- a/.github/scripts/test-swift.sh
查看文件 @dbff2ea
+++ b/.github/scripts/test-swift.sh
查看文件 @dbff2ea
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
 
+ ./run-streaming-hlg-decode-file.sh
+ rm ./streaming-hlg-decode-file
+ rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
+ 
 ./run-spoken-language-identification.sh
 rm -rf sherpa-onnx-whisper*
 
@@ -31,4 +35,5 @@ sed -i.bak  '20d' ./decode-file.swift
 
 ./run-decode-file-non-streaming.sh
 
+ 
 ls -lh
--- a/.github/workflows/test-dot-net.yaml
查看文件 @dbff2ea
+++ b/.github/workflows/test-dot-net.yaml
查看文件 @dbff2ea
@@ -178,6 +178,7 @@ jobs:
           cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
           cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
           cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
+           cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
 
           ls -lh /tmp
 
--- a/.github/workflows/test-go-package.yaml
查看文件 @dbff2ea
+++ b/.github/workflows/test-go-package.yaml
查看文件 @dbff2ea
@@ -66,12 +66,77 @@ jobs:
         run: |
           gcc --version
 
-       - name: Test speaker identification
+       - name: Test streaming HLG decoding (Linux/macOS)
+         if: matrix.os != 'windows-latest'
+         shell: bash
+         run: |
+           cd go-api-examples/streaming-hlg-decoding/
+           ./run.sh
+ 
+       - name: Test speaker identification (Linux/macOS)
+         if: matrix.os != 'windows-latest'
         shell: bash
         run: |
           cd go-api-examples/speaker-identification
           ./run.sh
 
+       - name: Test speaker identification (Win64)
+         if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
+         shell: bash
+         run: |
+           cd go-api-examples/speaker-identification
+           go mod tidy
+           cat go.mod
+           go build
+ 
+           curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+           git clone https://github.com/csukuangfj/sr-data
+           ls -lh
+           echo $PWD
+           ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+           ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+           cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
+           ls -lh
+           go mod tidy
+           go build
+           go run ./main.go
+ 
+       - name: Test speaker identification (Win32)
+         if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
+         shell: bash
+         run: |
+           cd go-api-examples/speaker-identification
+           go mod tidy
+           cat go.mod
+           ls -lh
+ 
+           go env GOARCH
+           go env
+           echo "------------------------------"
+           go env -w GOARCH=386
+           go env -w CGO_ENABLED=1
+           go env
+ 
+           go clean
+           go build
+ 
+           echo $PWD
+ 
+           curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+           git clone https://github.com/csukuangfj/sr-data
+           ls -lh
+           echo $PWD
+           ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+           ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+           cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
+           ls -lh
+           go mod tidy
+           go build
+           go run ./main.go
+ 
+           rm -rf sr-data
+           rm -rf *.onnx
+ 
       - name: Test non-streaming TTS (Linux/macOS)
         if: matrix.os != 'windows-latest'
         shell: bash
--- a/.github/workflows/test-go.yaml
查看文件 @dbff2ea
+++ b/.github/workflows/test-go.yaml
查看文件 @dbff2ea
@@ -74,6 +74,12 @@ jobs:
           go mod tidy
           go build
 
+       - name: Test streaming HLG decoding
+         shell: bash
+         run: |
+           cd scripts/go/_internal/streaming-hlg-decoding/
+           ./run.sh
+ 
       - name: Test speaker identification
         shell: bash
         run: |
--- a/c-api-examples/CMakeLists.txt
查看文件 @dbff2ea
+++ b/c-api-examples/CMakeLists.txt
查看文件 @dbff2ea
@@ -15,6 +15,9 @@ target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
 add_executable(speaker-identification-c-api speaker-identification-c-api.c)
 target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
 
+ add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
+ target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
+ 
 if(SHERPA_ONNX_HAS_ALSA)
   add_subdirectory(./asr-microphone-example)
 elseif((UNIX AND NOT APPLE) OR LINUX)
--- a/c-api-examples/streaming-hlg-decode-file-c-api.c 0 → 100644
查看文件 @dbff2ea
+++ b/c-api-examples/streaming-hlg-decode-file-c-api.c 0 → 100644
查看文件 @dbff2ea
+ // c-api-examples/streaming-hlg-decode-file-c-api.c
+ //
+ // Copyright (c)  2024  Xiaomi Corporation
+ /*
+ We use the following model as an example
+ 
+ // clang-format off
+ 
+ Download the model from
+ https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ 
+ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ 
+ build/bin/streaming-hlg-decode-file-c-api
+ 
+ (The above model is from https://github.com/k2-fsa/icefall/pull/1557)
+ */
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "sherpa-onnx/c-api/c-api.h"
+ 
+ int32_t main() {
+   // clang-format off
+   //
+   // Please download the model from
+   // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   const char *model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
+   const char *tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
+   const char *graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
+   const char *wav_filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
+   // clang-format on
+ 
+   SherpaOnnxOnlineRecognizerConfig config;
+ 
+   memset(&config, 0, sizeof(config));
+   config.feat_config.sample_rate = 16000;
+   config.feat_config.feature_dim = 80;
+   config.model_config.zipformer2_ctc.model = model;
+   config.model_config.tokens = tokens;
+   config.model_config.num_threads = 1;
+   config.model_config.provider = "cpu";
+   config.model_config.debug = 0;
+   config.ctc_fst_decoder_config.graph = graph;
+   const SherpaOnnxOnlineRecognizer *recognizer =
+       CreateOnlineRecognizer(&config);
+   if (!recognizer) {
+     fprintf(stderr, "Failed to create recognizer");
+     exit(-1);
+   }
+ 
+   const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
+ 
+   const SherpaOnnxDisplay *display = CreateDisplay(50);
+   int32_t segment_id = 0;
+ 
+   const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+   if (wave == NULL) {
+     fprintf(stderr, "Failed to read %s\n", wav_filename);
+     exit(-1);
+   }
+ 
+ // simulate streaming. You can choose an arbitrary N
+ #define N 3200
+ 
+   int16_t buffer[N];
+   float samples[N];
+   fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
+           wave->sample_rate, wave->num_samples,
+           (float)wave->num_samples / wave->sample_rate);
+ 
+   int32_t k = 0;
+   while (k < wave->num_samples) {
+     int32_t start = k;
+     int32_t end =
+         (start + N > wave->num_samples) ? wave->num_samples : (start + N);
+     k += N;
+ 
+     AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
+                    end - start);
+     while (IsOnlineStreamReady(recognizer, stream)) {
+       DecodeOnlineStream(recognizer, stream);
+     }
+ 
+     const SherpaOnnxOnlineRecognizerResult *r =
+         GetOnlineStreamResult(recognizer, stream);
+ 
+     if (strlen(r->text)) {
+       SherpaOnnxPrint(display, segment_id, r->text);
+     }
+ 
+     if (IsEndpoint(recognizer, stream)) {
+       if (strlen(r->text)) {
+         ++segment_id;
+       }
+       Reset(recognizer, stream);
+     }
+ 
+     DestroyOnlineRecognizerResult(r);
+   }
+ 
+   // add some tail padding
+   float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
+   AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
+ 
+   SherpaOnnxFreeWave(wave);
+ 
+   InputFinished(stream);
+   while (IsOnlineStreamReady(recognizer, stream)) {
+     DecodeOnlineStream(recognizer, stream);
+   }
+ 
+   const SherpaOnnxOnlineRecognizerResult *r =
+       GetOnlineStreamResult(recognizer, stream);
+ 
+   if (strlen(r->text)) {
+     SherpaOnnxPrint(display, segment_id, r->text);
+   }
+ 
+   DestroyOnlineRecognizerResult(r);
+ 
+   DestroyDisplay(display);
+   DestroyOnlineStream(stream);
+   DestroyOnlineRecognizer(recognizer);
+   fprintf(stderr, "\n");
+ 
+   return 0;
+ }
--- a/cmake/onnxruntime.cmake
查看文件 @dbff2ea
+++ b/cmake/onnxruntime.cmake
查看文件 @dbff2ea
@@ -5,7 +5,7 @@ function(download_onnxruntime)
   message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
   message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
   if(SHERPA_ONNX_ENABLE_WASM)
-       include(onnxruntime-wasm-simd)
+     include(onnxruntime-wasm-simd)
   elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
     if(BUILD_SHARED_LIBS)
       include(onnxruntime-linux-riscv64)
--- a/dotnet-examples/sherpa-onnx.sln
查看文件 @dbff2ea
+++ b/dotnet-examples/sherpa-onnx.sln
查看文件 @dbff2ea
@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
 EndProject
+ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
+ EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -48,5 +50,9 @@ Global
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
+ 		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ 		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ 		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ 		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 EndGlobal
--- a/dotnet-examples/streaming-hlg-decoding/Program.cs 0 → 100644
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/Program.cs 0 → 100644
查看文件 @dbff2ea
+ // Copyright (c)  2024  Xiaomi Corporation
+ //
+ // This file shows how to do streaming HLG decoding.
+ //
+ // 1. Download the model for testing
+ //
+ //  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ //  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ //  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ //
+ // 2. Now run it
+ //
+ // dotnet run
+ 
+ using SherpaOnnx;
+ using System.Collections.Generic;
+ using System;
+ 
+ class StreamingHlgDecodingDemo
+ {
+ 
+   static void Main(string[] args)
+   {
+     var config = new OnlineRecognizerConfig();
+     config.FeatConfig.SampleRate = 16000;
+     config.FeatConfig.FeatureDim = 80;
+     config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
+ 
+     config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
+     config.ModelConfig.Provider = "cpu";
+     config.ModelConfig.NumThreads = 1;
+     config.ModelConfig.Debug = 0;
+     config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
+ 
+     OnlineRecognizer recognizer = new OnlineRecognizer(config);
+ 
+     var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
+ 
+     WaveReader waveReader = new WaveReader(filename);
+     OnlineStream s = recognizer.CreateStream();
+     s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
+ 
+     float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+     s.AcceptWaveform(waveReader.SampleRate, tailPadding);
+     s.InputFinished();
+ 
+     while (recognizer.IsReady(s))
+     {
+       recognizer.Decode(s);
+     }
+ 
+     OnlineRecognizerResult r = recognizer.GetResult(s);
+     var text = r.Text;
+     var tokens = r.Tokens;
+     Console.WriteLine("--------------------");
+     Console.WriteLine(filename);
+     Console.WriteLine("text: {0}", text);
+     Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
+     Console.Write("timestamps: [");
+     r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+     Console.WriteLine("]");
+     Console.WriteLine("--------------------");
+   }
+ }
+ 
+ 
--- a/dotnet-examples/streaming-hlg-decoding/WaveReader.cs 0 → 120000
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/WaveReader.cs 0 → 120000
查看文件 @dbff2ea
+ ../online-decode-files/WaveReader.cs
\ No newline at end of file
--- a/dotnet-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ fi
+ 
+ dotnet run -c Release
--- a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+ <Project Sdk="Microsoft.NET.Sdk">
+ 
+   <PropertyGroup>
+     <OutputType>Exe</OutputType>
+     <TargetFramework>net6.0</TargetFramework>
+     <RootNamespace>streaming_hlg_decoding</RootNamespace>
+     <ImplicitUsings>enable</ImplicitUsings>
+     <Nullable>enable</Nullable>
+   </PropertyGroup>
+ 
+   <ItemGroup>
+     <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
+   </ItemGroup>
+ 
+ </Project>
--- a/go-api-examples/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+++ b/go-api-examples/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+ module streaming-hlg-decoding
+ 
+ go 1.12
--- a/go-api-examples/streaming-hlg-decoding/main.go 0 → 100644
查看文件 @dbff2ea
+++ b/go-api-examples/streaming-hlg-decoding/main.go 0 → 100644
查看文件 @dbff2ea
+ package main
+ 
+ import (
+ 	"bytes"
+ 	"encoding/binary"
+ 	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+ 	"github.com/youpy/go-wav"
+ 	"log"
+ 	"os"
+ 	"strings"
+ )
+ 
+ func main() {
+ 	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
+ 
+ 	config := sherpa.OnlineRecognizerConfig{}
+ 	config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}
+ 
+ 	// please download model files from
+ 	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ 	config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
+ 	config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"
+ 
+ 	config.ModelConfig.NumThreads = 1
+ 	config.ModelConfig.Debug = 0
+ 	config.ModelConfig.Provider = "cpu"
+ 	config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"
+ 
+ 	wav_filename := "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
+ 
+ 	samples, sampleRate := readWave(wav_filename)
+ 
+ 	log.Println("Initializing recognizer (may take several seconds)")
+ 	recognizer := sherpa.NewOnlineRecognizer(&config)
+ 	log.Println("Recognizer created!")
+ 	defer sherpa.DeleteOnlineRecognizer(recognizer)
+ 
+ 	log.Println("Start decoding!")
+ 	stream := sherpa.NewOnlineStream(recognizer)
+ 	defer sherpa.DeleteOnlineStream(stream)
+ 
+ 	stream.AcceptWaveform(sampleRate, samples)
+ 
+ 	tailPadding := make([]float32, int(float32(sampleRate)*0.3))
+ 	stream.AcceptWaveform(sampleRate, tailPadding)
+ 
+ 	for recognizer.IsReady(stream) {
+ 		recognizer.Decode(stream)
+ 	}
+ 	log.Println("Decoding done!")
+ 	result := recognizer.GetResult(stream)
+ 	log.Println(strings.ToLower(result.Text))
+ 	log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
+ }
+ 
+ func readWave(filename string) (samples []float32, sampleRate int) {
+ 	file, _ := os.Open(filename)
+ 	defer file.Close()
+ 
+ 	reader := wav.NewReader(file)
+ 	format, err := reader.Format()
+ 	if err != nil {
+ 		log.Fatalf("Failed to read wave format")
+ 	}
+ 
+ 	if format.AudioFormat != 1 {
+ 		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
+ 	}
+ 
+ 	if format.NumChannels != 1 {
+ 		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
+ 	}
+ 
+ 	if format.BitsPerSample != 16 {
+ 		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
+ 	}
+ 
+ 	reader.Duration() // so that it initializes reader.Size
+ 
+ 	buf := make([]byte, reader.Size)
+ 	n, err := reader.Read(buf)
+ 	if n != int(reader.Size) {
+ 		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
+ 	}
+ 
+ 	samples = samplesInt16ToFloat(buf)
+ 	sampleRate = int(format.SampleRate)
+ 
+ 	return
+ }
+ 
+ func samplesInt16ToFloat(inSamples []byte) []float32 {
+ 	numSamples := len(inSamples) / 2
+ 	outSamples := make([]float32, numSamples)
+ 
+ 	for i := 0; i != numSamples; i++ {
+ 		s := inSamples[i*2 : (i+1)*2]
+ 
+ 		var s16 int16
+ 		buf := bytes.NewReader(s)
+ 		err := binary.Read(buf, binary.LittleEndian, &s16)
+ 		if err != nil {
+ 			log.Fatal("Failed to parse 16-bit sample")
+ 		}
+ 		outSamples[i] = float32(s16) / 32768
+ 	}
+ 
+ 	return outSamples
+ }
--- a/go-api-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+++ b/go-api-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+   curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ fi
+ 
+ go mod tidy
+ go build
+ ls -lh
+ ./streaming-hlg-decoding
--- a/nodejs-examples/README.md
查看文件 @dbff2ea
+++ b/nodejs-examples/README.md
查看文件 @dbff2ea
@@ -174,3 +174,16 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp
 tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
 node ./test-online-zipformer2-ctc.js
 ```
+ 
+ ## ./test-online-zipformer2-ctc-hlg.js
+ [./test-online-zipformer2-ctc-hlg.js](./test-online-zipformer2-ctc-hlg.js) demonstrates
+ how to decode a file using a streaming zipformer2 CTC model with HLG. In the code
+ we use [sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2).
+ 
+ You can use the following command to run it:
+ 
+ ```bash
+ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ node ./test-online-zipformer2-ctc-hlg.js
+ ```
--- a/nodejs-examples/test-online-paraformer-microphone.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-paraformer-microphone.js
查看文件 @dbff2ea
@@ -50,6 +50,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: '',
+       maxActive: 3000,
+     }
   };
 
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-paraformer.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-paraformer.js
查看文件 @dbff2ea
@@ -51,6 +51,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: '',
+       maxActive: 3000,
+     }
   };
 
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-transducer-microphone.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-transducer-microphone.js
查看文件 @dbff2ea
@@ -52,6 +52,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: '',
+       maxActive: 3000,
+     }
   };
 
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-transducer.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-transducer.js
查看文件 @dbff2ea
@@ -53,6 +53,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: '',
+       maxActive: 3000,
+     }
   };
 
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-zipformer2-ctc-hlg.js 0 → 100644
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-zipformer2-ctc-hlg.js 0 → 100644
查看文件 @dbff2ea
+ // Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+ //
+ const fs = require('fs');
+ const {Readable} = require('stream');
+ const wav = require('wav');
+ 
+ const sherpa_onnx = require('sherpa-onnx');
+ 
+ function createOnlineRecognizer() {
+   let onlineTransducerModelConfig = {
+     encoder: '',
+     decoder: '',
+     joiner: '',
+   };
+ 
+   let onlineParaformerModelConfig = {
+     encoder: '',
+     decoder: '',
+   };
+ 
+   let onlineZipformer2CtcModelConfig = {
+     model:
+         './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
+   };
+ 
+   let onlineModelConfig = {
+     transducer: onlineTransducerModelConfig,
+     paraformer: onlineParaformerModelConfig,
+     zipformer2Ctc: onlineZipformer2CtcModelConfig,
+     tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
+     numThreads: 1,
+     provider: 'cpu',
+     debug: 0,
+     modelType: '',
+   };
+ 
+   let featureConfig = {
+     sampleRate: 16000,
+     featureDim: 80,
+   };
+ 
+   let recognizerConfig = {
+     featConfig: featureConfig,
+     modelConfig: onlineModelConfig,
+     decodingMethod: 'greedy_search',
+     maxActivePaths: 4,
+     enableEndpoint: 1,
+     rule1MinTrailingSilence: 2.4,
+     rule2MinTrailingSilence: 1.2,
+     rule3MinUtteranceLength: 20,
+     hotwordsFile: '',
+     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
+       maxActive: 3000,
+     }
+   };
+ 
+   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
+ }
+ 
+ const recognizer = createOnlineRecognizer();
+ const stream = recognizer.createStream();
+ 
+ const waveFilename =
+     './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
+ 
+ const reader = new wav.Reader();
+ const readable = new Readable().wrap(reader);
+ 
+ function decode(samples) {
+   stream.acceptWaveform(gSampleRate, samples);
+ 
+   while (recognizer.isReady(stream)) {
+     recognizer.decode(stream);
+   }
+   const text = recognizer.getResult(stream);
+   console.log(text);
+ }
+ 
+ let gSampleRate = 16000;
+ 
+ reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
+   gSampleRate = sampleRate;
+ 
+   if (audioFormat != 1) {
+     throw new Error(`Only support PCM format. Given ${audioFormat}`);
+   }
+ 
+   if (channels != 1) {
+     throw new Error(`Only a single channel. Given ${channel}`);
+   }
+ 
+   if (bitDepth != 16) {
+     throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
+   }
+ });
+ 
+ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
+     .pipe(reader)
+     .on('finish', function(err) {
+       // tail padding
+       const floatSamples =
+           new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
+       decode(floatSamples);
+       stream.free();
+       recognizer.free();
+     });
+ 
+ readable.on('readable', function() {
+   let chunk;
+   while ((chunk = readable.read()) != null) {
+     const int16Samples = new Int16Array(
+         chunk.buffer, chunk.byteOffset,
+         chunk.length / Int16Array.BYTES_PER_ELEMENT);
+ 
+     const floatSamples = new Float32Array(int16Samples.length);
+ 
+     for (let i = 0; i < floatSamples.length; i++) {
+       floatSamples[i] = int16Samples[i] / 32768.0;
+     }
+ 
+     decode(floatSamples);
+   }
+ });
--- a/nodejs-examples/test-online-zipformer2-ctc.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-zipformer2-ctc.js
查看文件 @dbff2ea
@@ -51,6 +51,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: '',
+       maxActive: 3000,
+     }
   };
 
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/scripts/dotnet/examples/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+++ b/scripts/dotnet/examples/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+ <Project Sdk="Microsoft.NET.Sdk">
+ 
+   <PropertyGroup>
+     <OutputType>Exe</OutputType>
+     <TargetFramework>net6.0</TargetFramework>
+     <RootNamespace>streaming_hlg_decoding</RootNamespace>
+     <ImplicitUsings>enable</ImplicitUsings>
+     <Nullable>enable</Nullable>
+   </PropertyGroup>
+ 
+   <PropertyGroup>
+     <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
+   </PropertyGroup>
+ 
+   <ItemGroup>
+     <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
+   </ItemGroup>
+ 
+ </Project>
--- a/scripts/dotnet/online.cs
查看文件 @dbff2ea
+++ b/scripts/dotnet/online.cs
查看文件 @dbff2ea
@@ -117,6 +117,21 @@ namespace SherpaOnnx
   }
 
   [StructLayout(LayoutKind.Sequential)]
+   public struct OnlineCtcFstDecoderConfig
+   {
+     public OnlineCtcFstDecoderConfig()
+     {
+       Graph = "";
+       MaxActive = 3000;
+     }
+ 
+     [MarshalAs(UnmanagedType.LPStr)]
+     public string Graph;
+ 
+     public int MaxActive;
+   }
+ 
+   [StructLayout(LayoutKind.Sequential)]
   public struct OnlineRecognizerConfig
   {
     public OnlineRecognizerConfig()
@@ -131,6 +146,7 @@ namespace SherpaOnnx
       Rule3MinUtteranceLength = 20.0F;
       HotwordsFile = "";
       HotwordsScore = 1.5F;
+       CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
     }
     public FeatureConfig FeatConfig;
     public OnlineModelConfig ModelConfig;
@@ -167,6 +183,8 @@ namespace SherpaOnnx
 
     /// Bonus score for each token in hotwords.
     public float HotwordsScore;
+ 
+     public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
   }
 
   public class OnlineRecognizerResult
--- a/scripts/go/_internal/streaming-hlg-decoding/.gitignore 0 → 100644
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/.gitignore 0 → 100644
查看文件 @dbff2ea
+ streaming-hlg-decoding
--- a/scripts/go/_internal/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+ module streaming-hlg-decoding
+ 
+ go 1.12
+ 
+ replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
--- a/scripts/go/_internal/streaming-hlg-decoding/main.go 0 → 120000
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/main.go 0 → 120000
查看文件 @dbff2ea
+ ../../../../go-api-examples/streaming-hlg-decoding/main.go
\ No newline at end of file
--- a/scripts/go/_internal/streaming-hlg-decoding/run.sh 0 → 120000
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/run.sh 0 → 120000
查看文件 @dbff2ea
+ ../../../../go-api-examples/streaming-hlg-decoding/run.sh
\ No newline at end of file
--- a/scripts/go/sherpa_onnx.go
查看文件 @dbff2ea
+++ b/scripts/go/sherpa_onnx.go
查看文件 @dbff2ea
@@ -99,6 +99,11 @@ type FeatureConfig struct {
 	FeatureDim int
 }
 
+ type OnlineCtcFstDecoderConfig struct {
+ 	Graph     string
+ 	MaxActive int
+ }
+ 
 // Configuration for the online/streaming recognizer.
 type OnlineRecognizerConfig struct {
 	FeatConfig  FeatureConfig
@@ -120,6 +125,7 @@ type OnlineRecognizerConfig struct {
 	Rule1MinTrailingSilence float32
 	Rule2MinTrailingSilence float32
 	Rule3MinUtteranceLength float32
+ 	CtcFstDecoderConfig     OnlineCtcFstDecoderConfig
 }
 
 // It contains the recognition result for a online stream.
@@ -190,6 +196,10 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
 	c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
 	c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
 
+ 	c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
+ 	defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
+ 	c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
+ 
 	recognizer := &OnlineRecognizer{}
 	recognizer.impl = C.CreateOnlineRecognizer(&c)
 
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @dbff2ea
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @dbff2ea
@@ -99,6 +99,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
   recognizer_config.hotwords_score =
       SHERPA_ONNX_OR(config->hotwords_score, 1.5);
 
+   recognizer_config.ctc_fst_decoder_config.graph =
+       SHERPA_ONNX_OR(config->ctc_fst_decoder_config.graph, "");
+   recognizer_config.ctc_fst_decoder_config.max_active =
+       SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000);
+ 
   if (config->model_config.debug) {
     SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
   }
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @dbff2ea
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @dbff2ea
@@ -96,6 +96,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
   int32_t feature_dim;
 } SherpaOnnxFeatureConfig;
 
+ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineCtcFstDecoderConfig {
+   const char *graph;
+   int32_t max_active;
+ } SherpaOnnxOnlineCtcFstDecoderConfig;
+ 
 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
   SherpaOnnxFeatureConfig feat_config;
   SherpaOnnxOnlineModelConfig model_config;
@@ -131,6 +136,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
 
   /// Bonus score for each token in hotwords.
   float hotwords_score;
+ 
+   SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config;
 } SherpaOnnxOnlineRecognizerConfig;
 
 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
--- a/swift-api-examples/.gitignore
查看文件 @dbff2ea
+++ b/swift-api-examples/.gitignore
查看文件 @dbff2ea
@@ -7,3 +7,4 @@ vits-vctk
 sherpa-onnx-paraformer-zh-2023-09-14
 !*.sh
 *.bak
+ streaming-hlg-decode-file
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @dbff2ea
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @dbff2ea
@@ -111,6 +111,15 @@ func sherpaOnnxFeatureConfig(
     feature_dim: Int32(featureDim))
 }
 
+ func sherpaOnnxOnlineCtcFstDecoderConfig(
+   graph: String = "",
+   maxActive: Int = 3000
+ ) -> SherpaOnnxOnlineCtcFstDecoderConfig {
+   return SherpaOnnxOnlineCtcFstDecoderConfig(
+     graph: toCPointer(graph),
+     max_active: Int32(maxActive))
+ }
+ 
 func sherpaOnnxOnlineRecognizerConfig(
   featConfig: SherpaOnnxFeatureConfig,
   modelConfig: SherpaOnnxOnlineModelConfig,
@@ -121,7 +130,8 @@ func sherpaOnnxOnlineRecognizerConfig(
   decodingMethod: String = "greedy_search",
   maxActivePaths: Int = 4,
   hotwordsFile: String = "",
-   hotwordsScore: Float = 1.5
+   hotwordsScore: Float = 1.5,
+   ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig()
 ) -> SherpaOnnxOnlineRecognizerConfig {
   return SherpaOnnxOnlineRecognizerConfig(
     feat_config: featConfig,
@@ -133,7 +143,9 @@ func sherpaOnnxOnlineRecognizerConfig(
     rule2_min_trailing_silence: rule2MinTrailingSilence,
     rule3_min_utterance_length: rule3MinUtteranceLength,
     hotwords_file: toCPointer(hotwordsFile),
-     hotwords_score: hotwordsScore)
+     hotwords_score: hotwordsScore,
+     ctc_fst_decoder_config: ctcFstDecoderConfig
+   )
 }
 
 /// Wrapper for recognition result.
--- a/swift-api-examples/run-streaming-hlg-decode-file.sh 0 → 100755
查看文件 @dbff2ea
+++ b/swift-api-examples/run-streaming-hlg-decode-file.sh 0 → 100755
查看文件 @dbff2ea
+ #!/usr/bin/env bash
+ 
+ set -ex
+ 
+ if [ ! -d ../build-swift-macos ]; then
+   echo "Please run ../build-swift-macos.sh first!"
+   exit 1
+ fi
+ 
+ if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+   echo "Downloading the pre-trained model for testing."
+ 
+   wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+   rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+ fi
+ 
+ if [ ! -e ./streaming-hlg-decode-file ]; then
+   # Note: We use -lc++ to link against libc++ instead of libstdc++
+   swiftc \
+     -lc++ \
+     -I ../build-swift-macos/install/include \
+     -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+     ./streaming-hlg-decode-file.swift  ./SherpaOnnx.swift \
+     -L ../build-swift-macos/install/lib/ \
+     -l sherpa-onnx \
+     -l onnxruntime \
+     -o streaming-hlg-decode-file
+ 
+   strip ./streaming-hlg-decode-file
+ else
+   echo "./streaming-hlg-decode-file exists - skip building"
+ fi
+ 
+ export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+ ./streaming-hlg-decode-file
--- a/swift-api-examples/streaming-hlg-decode-file.swift 0 → 100644
查看文件 @dbff2ea
+++ b/swift-api-examples/streaming-hlg-decode-file.swift 0 → 100644
查看文件 @dbff2ea
+ import AVFoundation
+ 
+ extension AudioBuffer {
+   func array() -> [Float] {
+     return Array(UnsafeBufferPointer(self))
+   }
+ }
+ 
+ extension AVAudioPCMBuffer {
+   func array() -> [Float] {
+     return self.audioBufferList.pointee.mBuffers.array()
+   }
+ }
+ 
+ func run() {
+   let filePath =
+     "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
+   let model =
+     "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
+   let tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"
+   let zipfomer2CtcModelConfig = sherpaOnnxOnlineZipformer2CtcModelConfig(
+     model: model
+   )
+ 
+   let modelConfig = sherpaOnnxOnlineModelConfig(
+     tokens: tokens,
+     zipformer2Ctc: zipfomer2CtcModelConfig
+   )
+ 
+   let featConfig = sherpaOnnxFeatureConfig(
+     sampleRate: 16000,
+     featureDim: 80
+   )
+ 
+   let ctcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(
+     graph: "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst",
+     maxActive: 3000
+   )
+ 
+   var config = sherpaOnnxOnlineRecognizerConfig(
+     featConfig: featConfig,
+     modelConfig: modelConfig,
+     ctcFstDecoderConfig: ctcFstDecoderConfig
+   )
+ 
+   let recognizer = SherpaOnnxRecognizer(config: &config)
+ 
+   let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+   let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+ 
+   let audioFormat = audioFile.processingFormat
+   assert(audioFormat.channelCount == 1)
+   assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+ 
+   let audioFrameCount = UInt32(audioFile.length)
+   let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+ 
+   try! audioFile.read(into: audioFileBuffer!)
+   let array: [Float]! = audioFileBuffer?.array()
+   recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate))
+ 
+   let tailPadding = [Float](repeating: 0.0, count: 3200)
+   recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate))
+ 
+   recognizer.inputFinished()
+   while recognizer.isReady() {
+     recognizer.decode()
+   }
+ 
+   let result = recognizer.getResult()
+   print("\nresult is:\n\(result.text)")
+ }
+ 
+ @main
+ struct App {
+   static func main() {
+     run()
+   }
+ }
--- a/wasm/asr/sherpa-onnx-asr.js
查看文件 @dbff2ea
+++ b/wasm/asr/sherpa-onnx-asr.js
查看文件 @dbff2ea
@@ -43,6 +43,10 @@ function freeConfig(config, Module) {
     freeConfig(config.lm, Module)
   }
 
+   if ('ctcFstDecoder' in config) {
+     freeConfig(config.ctcFstDecoder, Module)
+   }
+ 
   Module._free(config.ptr);
 }
 
@@ -193,11 +197,26 @@ function initSherpaOnnxFeatureConfig(config, Module) {
   return {ptr: ptr, len: len};
 }
 
+ function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
+   const len = 2 * 4;
+   const ptr = Module._malloc(len);
+ 
+   const graphLen = Module.lengthBytesUTF8(config.graph) + 1;
+   const buffer = Module._malloc(graphLen);
+   Module.stringToUTF8(config.graph, buffer, graphLen);
+ 
+   Module.setValue(ptr, buffer, 'i8*');
+   Module.setValue(ptr + 4, config.maxActive, 'i32');
+   return {ptr: ptr, len: len, buffer: buffer};
+ }
+ 
 function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
   const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
   const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
+   const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
+       config.ctcFstDecoderConfig, Module)
 
-   const len = feat.len + model.len + 8 * 4;
+   const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len;
   const ptr = Module._malloc(len);
 
   let offset = 0;
@@ -243,8 +262,11 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
   Module.setValue(ptr + offset, config.hotwordsScore, 'float');
   offset += 4;
 
+   Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
+ 
   return {
-     buffer: buffer, ptr: ptr, len: len, feat: feat, model: model
+     buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
+         ctcFstDecoder: ctcFstDecoder
   }
 }
 
@@ -313,6 +335,10 @@ function createOnlineRecognizer(Module, myConfig) {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+     ctcFstDecoderConfig: {
+       graph: '',
+       maxActive: 3000,
+     }
   };
   if (myConfig) {
     recognizerConfig = myConfig;
--- a/wasm/asr/sherpa-onnx-wasm-main-asr.cc
查看文件 @dbff2ea
+++ b/wasm/asr/sherpa-onnx-wasm-main-asr.cc
查看文件 @dbff2ea
@@ -22,9 +22,11 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
                       sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4,
               "");
 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
+ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
 static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
                   sizeof(SherpaOnnxFeatureConfig) +
-                       sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4,
+                       sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
+                       sizeof(SherpaOnnxOnlineCtcFstDecoderConfig),
               "");
 
 void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
@@ -67,6 +69,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
           config->rule3_min_utterance_length);
   fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
   fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
+ 
+   fprintf(stdout, "----------ctc fst decoder config----------\n");
+   fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
+   fprintf(stdout, "max_active: %d\n",
+           config->ctc_fst_decoder_config.max_active);
 }
 
 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {