Add C API for streaming HLG decoding (#734)

Fangjun Kuang · GitHub
Commit dbff2eaadba78729c7660c0cd0cbf2f5f0252007 dbff2eaa 1 parent db67e00c
.github/scripts/test-dot-net.sh
.github/scripts/test-nodejs-npm.sh
.github/scripts/test-swift.sh
.github/workflows/test-dot-net.yaml
.github/workflows/test-go-package.yaml
.github/workflows/test-go.yaml
c-api-examples/CMakeLists.txt
c-api-examples/streaming-hlg-decode-file-c-api.c
cmake/onnxruntime.cmake
dotnet-examples/sherpa-onnx.sln
dotnet-examples/streaming-hlg-decoding/Program.cs
dotnet-examples/streaming-hlg-decoding/WaveReader.cs
dotnet-examples/streaming-hlg-decoding/run.sh
dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj
go-api-examples/streaming-hlg-decoding/go.mod
go-api-examples/streaming-hlg-decoding/main.go
go-api-examples/streaming-hlg-decoding/run.sh
nodejs-examples/README.md
nodejs-examples/test-online-paraformer-microphone.js
nodejs-examples/test-online-paraformer.js
--- a/.github/scripts/test-dot-net.sh
查看文件 @dbff2ea
+++ b/.github/scripts/test-dot-net.sh
查看文件 @dbff2ea
@@ -2,7 +2,10 @@
 cd dotnet-examples/
-cd spoken-language-identification
+cd streaming-hlg-decoding/
+./run.sh
+
+cd ../spoken-language-identification
 ./run.sh
 cd ../online-decode-files
--- a/.github/scripts/test-nodejs-npm.sh
查看文件 @dbff2ea
+++ b/.github/scripts/test-nodejs-npm.sh
查看文件 @dbff2ea
@@ -58,6 +58,13 @@ rm sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
 node ./test-online-zipformer2-ctc.js
 rm -rf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
+
+curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+node ./test-online-zipformer2-ctc-hlg.js
+rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
+
 # offline tts
 curl -LS -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
--- a/.github/scripts/test-swift.sh
查看文件 @dbff2ea
+++ b/.github/scripts/test-swift.sh
查看文件 @dbff2ea
@@ -7,6 +7,10 @@ echo "pwd: $PWD"
 cd swift-api-examples
 ls -lh
+./run-streaming-hlg-decode-file.sh
+rm ./streaming-hlg-decode-file
+rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
+
 ./run-spoken-language-identification.sh
 rm -rf sherpa-onnx-whisper*
@@ -31,4 +35,5 @@ sed -i.bak  '20d' ./decode-file.swift
 ./run-decode-file-non-streaming.sh
+
 ls -lh
--- a/.github/workflows/test-dot-net.yaml
查看文件 @dbff2ea
+++ b/.github/workflows/test-dot-net.yaml
查看文件 @dbff2ea
@@ -178,6 +178,7 @@ jobs:
           cp -v scripts/dotnet/examples/online-decode-files.csproj dotnet-examples/online-decode-files/
           cp -v scripts/dotnet/examples/speech-recognition-from-microphone.csproj dotnet-examples/speech-recognition-from-microphone/
           cp -v scripts/dotnet/examples/spoken-language-identification.csproj dotnet-examples/spoken-language-identification/
+          cp -v scripts/dotnet/examples/streaming-hlg-decoding.csproj dotnet-examples/streaming-hlg-decoding
           ls -lh /tmp
--- a/.github/workflows/test-go-package.yaml
查看文件 @dbff2ea
+++ b/.github/workflows/test-go-package.yaml
查看文件 @dbff2ea
@@ -66,12 +66,77 @@ jobs:
         run: |
           gcc --version
-      - name: Test speaker identification
+      - name: Test streaming HLG decoding (Linux/macOS)
+        if: matrix.os != 'windows-latest'
+        shell: bash
+        run: |
+          cd go-api-examples/streaming-hlg-decoding/
+          ./run.sh
+
+      - name: Test speaker identification (Linux/macOS)
+        if: matrix.os != 'windows-latest'
         shell: bash
         run: |
           cd go-api-examples/speaker-identification
           ./run.sh
+      - name: Test speaker identification (Win64)
+        if: matrix.os == 'windows-latest' && matrix.arch == 'x64'
+        shell: bash
+        run: |
+          cd go-api-examples/speaker-identification
+          go mod tidy
+          cat go.mod
+          go build
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+          git clone https://github.com/csukuangfj/sr-data
+          ls -lh
+          echo $PWD
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
+          ls -lh
+          go mod tidy
+          go build
+          go run ./main.go
+
+      - name: Test speaker identification (Win32)
+        if: matrix.os == 'windows-latest' && matrix.arch == 'x86'
+        shell: bash
+        run: |
+          cd go-api-examples/speaker-identification
+          go mod tidy
+          cat go.mod
+          ls -lh
+
+          go env GOARCH
+          go env
+          echo "------------------------------"
+          go env -w GOARCH=386
+          go env -w CGO_ENABLED=1
+          go env
+
+          go clean
+          go build
+
+          echo $PWD
+
+          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_campplus_sv_zh-cn_16k-common.onnx
+          git clone https://github.com/csukuangfj/sr-data
+          ls -lh
+          echo $PWD
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/
+          ls -lh /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/*
+          cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
+          ls -lh
+          go mod tidy
+          go build
+          go run ./main.go
+
+          rm -rf sr-data
+          rm -rf *.onnx
+
       - name: Test non-streaming TTS (Linux/macOS)
         if: matrix.os != 'windows-latest'
         shell: bash
--- a/.github/workflows/test-go.yaml
查看文件 @dbff2ea
+++ b/.github/workflows/test-go.yaml
查看文件 @dbff2ea
@@ -74,6 +74,12 @@ jobs:
           go mod tidy
           go build
+      - name: Test streaming HLG decoding
+        shell: bash
+        run: |
+          cd scripts/go/_internal/streaming-hlg-decoding/
+          ./run.sh
+
       - name: Test speaker identification
         shell: bash
         run: |
--- a/c-api-examples/CMakeLists.txt
查看文件 @dbff2ea
+++ b/c-api-examples/CMakeLists.txt
查看文件 @dbff2ea
@@ -15,6 +15,9 @@ target_link_libraries(spoken-language-identification-c-api sherpa-onnx-c-api)
 add_executable(speaker-identification-c-api speaker-identification-c-api.c)
 target_link_libraries(speaker-identification-c-api sherpa-onnx-c-api)
+add_executable(streaming-hlg-decode-file-c-api streaming-hlg-decode-file-c-api.c)
+target_link_libraries(streaming-hlg-decode-file-c-api sherpa-onnx-c-api)
+
 if(SHERPA_ONNX_HAS_ALSA)
   add_subdirectory(./asr-microphone-example)
 elseif((UNIX AND NOT APPLE) OR LINUX)
--- a/c-api-examples/streaming-hlg-decode-file-c-api.c 0 → 100644
查看文件 @dbff2ea
+++ b/c-api-examples/streaming-hlg-decode-file-c-api.c 0 → 100644
查看文件 @dbff2ea
+// c-api-examples/streaming-hlg-decode-file-c-api.c
+//
+// Copyright (c)  2024  Xiaomi Corporation
+/*
+We use the following model as an example
+
+// clang-format off
+
+Download the model from
+https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+
+tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+
+build/bin/streaming-hlg-decode-file-c-api
+
+(The above model is from https://github.com/k2-fsa/icefall/pull/1557)
+*/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+int32_t main() {
+  // clang-format off
+  //
+  // Please download the model from
+  // https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  const char *model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
+  const char *tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
+  const char *graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
+  const char *wav_filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
+  // clang-format on
+
+  SherpaOnnxOnlineRecognizerConfig config;
+
+  memset(&config, 0, sizeof(config));
+  config.feat_config.sample_rate = 16000;
+  config.feat_config.feature_dim = 80;
+  config.model_config.zipformer2_ctc.model = model;
+  config.model_config.tokens = tokens;
+  config.model_config.num_threads = 1;
+  config.model_config.provider = "cpu";
+  config.model_config.debug = 0;
+  config.ctc_fst_decoder_config.graph = graph;
+  const SherpaOnnxOnlineRecognizer *recognizer =
+      CreateOnlineRecognizer(&config);
+  if (!recognizer) {
+    fprintf(stderr, "Failed to create recognizer");
+    exit(-1);
+  }
+
+  const SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer);
+
+  const SherpaOnnxDisplay *display = CreateDisplay(50);
+  int32_t segment_id = 0;
+
+  const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
+  if (wave == NULL) {
+    fprintf(stderr, "Failed to read %s\n", wav_filename);
+    exit(-1);
+  }
+
+// simulate streaming. You can choose an arbitrary N
+#define N 3200
+
+  int16_t buffer[N];
+  float samples[N];
+  fprintf(stderr, "sample rate: %d, num samples: %d, duration: %.2f s\n",
+          wave->sample_rate, wave->num_samples,
+          (float)wave->num_samples / wave->sample_rate);
+
+  int32_t k = 0;
+  while (k < wave->num_samples) {
+    int32_t start = k;
+    int32_t end =
+        (start + N > wave->num_samples) ? wave->num_samples : (start + N);
+    k += N;
+
+    AcceptWaveform(stream, wave->sample_rate, wave->samples + start,
+                   end - start);
+    while (IsOnlineStreamReady(recognizer, stream)) {
+      DecodeOnlineStream(recognizer, stream);
+    }
+
+    const SherpaOnnxOnlineRecognizerResult *r =
+        GetOnlineStreamResult(recognizer, stream);
+
+    if (strlen(r->text)) {
+      SherpaOnnxPrint(display, segment_id, r->text);
+    }
+
+    if (IsEndpoint(recognizer, stream)) {
+      if (strlen(r->text)) {
+        ++segment_id;
+      }
+      Reset(recognizer, stream);
+    }
+
+    DestroyOnlineRecognizerResult(r);
+  }
+
+  // add some tail padding
+  float tail_paddings[4800] = {0};  // 0.3 seconds at 16 kHz sample rate
+  AcceptWaveform(stream, wave->sample_rate, tail_paddings, 4800);
+
+  SherpaOnnxFreeWave(wave);
+
+  InputFinished(stream);
+  while (IsOnlineStreamReady(recognizer, stream)) {
+    DecodeOnlineStream(recognizer, stream);
+  }
+
+  const SherpaOnnxOnlineRecognizerResult *r =
+      GetOnlineStreamResult(recognizer, stream);
+
+  if (strlen(r->text)) {
+    SherpaOnnxPrint(display, segment_id, r->text);
+  }
+
+  DestroyOnlineRecognizerResult(r);
+
+  DestroyDisplay(display);
+  DestroyOnlineStream(stream);
+  DestroyOnlineRecognizer(recognizer);
+  fprintf(stderr, "\n");
+
+  return 0;
+}
--- a/cmake/onnxruntime.cmake
查看文件 @dbff2ea
+++ b/cmake/onnxruntime.cmake
查看文件 @dbff2ea
@@ -5,7 +5,7 @@ function(download_onnxruntime)
   message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
   message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
   if(SHERPA_ONNX_ENABLE_WASM)
-      include(onnxruntime-wasm-simd)
+    include(onnxruntime-wasm-simd)
   elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL riscv64)
     if(BUILD_SHARED_LIBS)
       include(onnxruntime-linux-riscv64)
--- a/dotnet-examples/sherpa-onnx.sln
查看文件 @dbff2ea
+++ b/dotnet-examples/sherpa-onnx.sln
查看文件 @dbff2ea
@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-tts-play", "offline
 EndProject
 Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "spoken-language-identification", "spoken-language-identification\spoken-language-identification.csproj", "{3D7CF3D6-AC45-4D50-9619-5687B1443E94}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "streaming-hlg-decoding", "streaming-hlg-decoding\streaming-hlg-decoding.csproj", "{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -48,5 +50,9 @@ Global
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Debug|Any CPU.Build.0 = Debug|Any CPU
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.ActiveCfg = Release|Any CPU
 		{3D7CF3D6-AC45-4D50-9619-5687B1443E94}.Release|Any CPU.Build.0 = Release|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C4A368A5-FCA0-419D-97C9-C8CE0B08EB99}.Release|Any CPU.Build.0 = Release|Any CPU
 	EndGlobalSection
 EndGlobal
--- a/dotnet-examples/streaming-hlg-decoding/Program.cs 0 → 100644
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/Program.cs 0 → 100644
查看文件 @dbff2ea
+// Copyright (c)  2024  Xiaomi Corporation
+//
+// This file shows how to do streaming HLG decoding.
+//
+// 1. Download the model for testing
+//
+//  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+//  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+//  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+//
+// 2. Now run it
+//
+// dotnet run
+
+using SherpaOnnx;
+using System.Collections.Generic;
+using System;
+
+class StreamingHlgDecodingDemo
+{
+
+  static void Main(string[] args)
+  {
+    var config = new OnlineRecognizerConfig();
+    config.FeatConfig.SampleRate = 16000;
+    config.FeatConfig.FeatureDim = 80;
+    config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx";
+
+    config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt";
+    config.ModelConfig.Provider = "cpu";
+    config.ModelConfig.NumThreads = 1;
+    config.ModelConfig.Debug = 0;
+    config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst";
+
+    OnlineRecognizer recognizer = new OnlineRecognizer(config);
+
+    var filename = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav";
+
+    WaveReader waveReader = new WaveReader(filename);
+    OnlineStream s = recognizer.CreateStream();
+    s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples);
+
+    float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)];
+    s.AcceptWaveform(waveReader.SampleRate, tailPadding);
+    s.InputFinished();
+
+    while (recognizer.IsReady(s))
+    {
+      recognizer.Decode(s);
+    }
+
+    OnlineRecognizerResult r = recognizer.GetResult(s);
+    var text = r.Text;
+    var tokens = r.Tokens;
+    Console.WriteLine("--------------------");
+    Console.WriteLine(filename);
+    Console.WriteLine("text: {0}", text);
+    Console.WriteLine("tokens: [{0}]", string.Join(", ", tokens));
+    Console.Write("timestamps: [");
+    r.Timestamps.ToList().ForEach(i => Console.Write(String.Format("{0:0.00}", i) + ", "));
+    Console.WriteLine("]");
+    Console.WriteLine("--------------------");
+  }
+}
+
+
--- a/dotnet-examples/streaming-hlg-decoding/WaveReader.cs 0 → 120000
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/WaveReader.cs 0 → 120000
查看文件 @dbff2ea
+../online-decode-files/WaveReader.cs
--- a/dotnet-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+fi
+
+dotnet run -c Release
--- a/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+++ b/dotnet-examples/streaming-hlg-decoding/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net6.0</TargetFramework>
+    <RootNamespace>streaming_hlg_decoding</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
+  </ItemGroup>
+
+</Project>
--- a/go-api-examples/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+++ b/go-api-examples/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+module streaming-hlg-decoding
+
+go 1.12
--- a/go-api-examples/streaming-hlg-decoding/main.go 0 → 100644
查看文件 @dbff2ea
+++ b/go-api-examples/streaming-hlg-decoding/main.go 0 → 100644
查看文件 @dbff2ea
+package main
+
+import (
+	"bytes"
+	"encoding/binary"
+	sherpa "github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx"
+	"github.com/youpy/go-wav"
+	"log"
+	"os"
+	"strings"
+)
+
+func main() {
+	log.SetFlags(log.LstdFlags | log.Lmicroseconds)
+
+	config := sherpa.OnlineRecognizerConfig{}
+	config.FeatConfig = sherpa.FeatureConfig{SampleRate: 16000, FeatureDim: 80}
+
+	// please download model files from
+	// https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+	config.ModelConfig.Zipformer2Ctc.Model = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
+	config.ModelConfig.Tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"
+
+	config.ModelConfig.NumThreads = 1
+	config.ModelConfig.Debug = 0
+	config.ModelConfig.Provider = "cpu"
+	config.CtcFstDecoderConfig.Graph = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst"
+
+	wav_filename := "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
+
+	samples, sampleRate := readWave(wav_filename)
+
+	log.Println("Initializing recognizer (may take several seconds)")
+	recognizer := sherpa.NewOnlineRecognizer(&config)
+	log.Println("Recognizer created!")
+	defer sherpa.DeleteOnlineRecognizer(recognizer)
+
+	log.Println("Start decoding!")
+	stream := sherpa.NewOnlineStream(recognizer)
+	defer sherpa.DeleteOnlineStream(stream)
+
+	stream.AcceptWaveform(sampleRate, samples)
+
+	tailPadding := make([]float32, int(float32(sampleRate)*0.3))
+	stream.AcceptWaveform(sampleRate, tailPadding)
+
+	for recognizer.IsReady(stream) {
+		recognizer.Decode(stream)
+	}
+	log.Println("Decoding done!")
+	result := recognizer.GetResult(stream)
+	log.Println(strings.ToLower(result.Text))
+	log.Printf("Wave duration: %v seconds", float32(len(samples))/float32(sampleRate))
+}
+
+func readWave(filename string) (samples []float32, sampleRate int) {
+	file, _ := os.Open(filename)
+	defer file.Close()
+
+	reader := wav.NewReader(file)
+	format, err := reader.Format()
+	if err != nil {
+		log.Fatalf("Failed to read wave format")
+	}
+
+	if format.AudioFormat != 1 {
+		log.Fatalf("Support only PCM format. Given: %v\n", format.AudioFormat)
+	}
+
+	if format.NumChannels != 1 {
+		log.Fatalf("Support only 1 channel wave file. Given: %v\n", format.NumChannels)
+	}
+
+	if format.BitsPerSample != 16 {
+		log.Fatalf("Support only 16-bit per sample. Given: %v\n", format.BitsPerSample)
+	}
+
+	reader.Duration() // so that it initializes reader.Size
+
+	buf := make([]byte, reader.Size)
+	n, err := reader.Read(buf)
+	if n != int(reader.Size) {
+		log.Fatalf("Failed to read %v bytes. Returned %v bytes\n", reader.Size, n)
+	}
+
+	samples = samplesInt16ToFloat(buf)
+	sampleRate = int(format.SampleRate)
+
+	return
+}
+
+func samplesInt16ToFloat(inSamples []byte) []float32 {
+	numSamples := len(inSamples) / 2
+	outSamples := make([]float32, numSamples)
+
+	for i := 0; i != numSamples; i++ {
+		s := inSamples[i*2 : (i+1)*2]
+
+		var s16 int16
+		buf := bytes.NewReader(s)
+		err := binary.Read(buf, binary.LittleEndian, &s16)
+		if err != nil {
+			log.Fatal("Failed to parse 16-bit sample")
+		}
+		outSamples[i] = float32(s16) / 32768
+	}
+
+	return outSamples
+}
--- a/go-api-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+++ b/go-api-examples/streaming-hlg-decoding/run.sh 0 → 100755
查看文件 @dbff2ea
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+fi
+
+go mod tidy
+go build
+ls -lh
+./streaming-hlg-decoding
--- a/nodejs-examples/README.md
查看文件 @dbff2ea
+++ b/nodejs-examples/README.md
查看文件 @dbff2ea
@@ -174,3 +174,16 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp
 tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
 node ./test-online-zipformer2-ctc.js
 ```
+
+## ./test-online-zipformer2-ctc-hlg.js
+[./test-online-zipformer2-ctc-hlg.js](./test-online-zipformer2-ctc-hlg.js) demonstrates
+how to decode a file using a streaming zipformer2 CTC model with HLG. In the code
+we use [sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2).
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+node ./test-online-zipformer2-ctc-hlg.js
+```
--- a/nodejs-examples/test-online-paraformer-microphone.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-paraformer-microphone.js
查看文件 @dbff2ea
@@ -50,6 +50,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: '',
+      maxActive: 3000,
+    }
   };
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-paraformer.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-paraformer.js
查看文件 @dbff2ea
@@ -51,6 +51,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: '',
+      maxActive: 3000,
+    }
   };
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-transducer-microphone.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-transducer-microphone.js
查看文件 @dbff2ea
@@ -52,6 +52,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: '',
+      maxActive: 3000,
+    }
   };
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-transducer.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-transducer.js
查看文件 @dbff2ea
@@ -53,6 +53,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: '',
+      maxActive: 3000,
+    }
   };
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/nodejs-examples/test-online-zipformer2-ctc-hlg.js 0 → 100644
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-zipformer2-ctc-hlg.js 0 → 100644
查看文件 @dbff2ea
+// Copyright (c)  2023  Xiaomi Corporation (authors: Fangjun Kuang)
+//
+const fs = require('fs');
+const {Readable} = require('stream');
+const wav = require('wav');
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOnlineRecognizer() {
+  let onlineTransducerModelConfig = {
+    encoder: '',
+    decoder: '',
+    joiner: '',
+  };
+
+  let onlineParaformerModelConfig = {
+    encoder: '',
+    decoder: '',
+  };
+
+  let onlineZipformer2CtcModelConfig = {
+    model:
+        './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
+  };
+
+  let onlineModelConfig = {
+    transducer: onlineTransducerModelConfig,
+    paraformer: onlineParaformerModelConfig,
+    zipformer2Ctc: onlineZipformer2CtcModelConfig,
+    tokens: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
+    numThreads: 1,
+    provider: 'cpu',
+    debug: 0,
+    modelType: '',
+  };
+
+  let featureConfig = {
+    sampleRate: 16000,
+    featureDim: 80,
+  };
+
+  let recognizerConfig = {
+    featConfig: featureConfig,
+    modelConfig: onlineModelConfig,
+    decodingMethod: 'greedy_search',
+    maxActivePaths: 4,
+    enableEndpoint: 1,
+    rule1MinTrailingSilence: 2.4,
+    rule2MinTrailingSilence: 1.2,
+    rule3MinUtteranceLength: 20,
+    hotwordsFile: '',
+    hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
+      maxActive: 3000,
+    }
+  };
+
+  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
+}
+
+const recognizer = createOnlineRecognizer();
+const stream = recognizer.createStream();
+
+const waveFilename =
+    './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav';
+
+const reader = new wav.Reader();
+const readable = new Readable().wrap(reader);
+
+function decode(samples) {
+  stream.acceptWaveform(gSampleRate, samples);
+
+  while (recognizer.isReady(stream)) {
+    recognizer.decode(stream);
+  }
+  const text = recognizer.getResult(stream);
+  console.log(text);
+}
+
+let gSampleRate = 16000;
+
+reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {
+  gSampleRate = sampleRate;
+
+  if (audioFormat != 1) {
+    throw new Error(`Only support PCM format. Given ${audioFormat}`);
+  }
+
+  if (channels != 1) {
+    throw new Error(`Only a single channel. Given ${channel}`);
+  }
+
+  if (bitDepth != 16) {
+    throw new Error(`Only support 16-bit samples. Given ${bitDepth}`);
+  }
+});
+
+fs.createReadStream(waveFilename, {'highWaterMark': 4096})
+    .pipe(reader)
+    .on('finish', function(err) {
+      // tail padding
+      const floatSamples =
+          new Float32Array(recognizer.config.featConfig.sampleRate * 0.5);
+      decode(floatSamples);
+      stream.free();
+      recognizer.free();
+    });
+
+readable.on('readable', function() {
+  let chunk;
+  while ((chunk = readable.read()) != null) {
+    const int16Samples = new Int16Array(
+        chunk.buffer, chunk.byteOffset,
+        chunk.length / Int16Array.BYTES_PER_ELEMENT);
+
+    const floatSamples = new Float32Array(int16Samples.length);
+
+    for (let i = 0; i < floatSamples.length; i++) {
+      floatSamples[i] = int16Samples[i] / 32768.0;
+    }
+
+    decode(floatSamples);
+  }
+});
--- a/nodejs-examples/test-online-zipformer2-ctc.js
查看文件 @dbff2ea
+++ b/nodejs-examples/test-online-zipformer2-ctc.js
查看文件 @dbff2ea
@@ -51,6 +51,10 @@ function createOnlineRecognizer() {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: '',
+      maxActive: 3000,
+    }
   };
   return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
--- a/scripts/dotnet/examples/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+++ b/scripts/dotnet/examples/streaming-hlg-decoding.csproj 0 → 100644
查看文件 @dbff2ea
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <TargetFramework>net6.0</TargetFramework>
+    <RootNamespace>streaming_hlg_decoding</RootNamespace>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+  </PropertyGroup>
+
+  <PropertyGroup>
+    <RestoreSources>/tmp/packages;$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" />
+  </ItemGroup>
+
+</Project>
--- a/scripts/dotnet/online.cs
查看文件 @dbff2ea
+++ b/scripts/dotnet/online.cs
查看文件 @dbff2ea
@@ -117,6 +117,21 @@ namespace SherpaOnnx
   }
   [StructLayout(LayoutKind.Sequential)]
+  public struct OnlineCtcFstDecoderConfig
+  {
+    public OnlineCtcFstDecoderConfig()
+    {
+      Graph = "";
+      MaxActive = 3000;
+    }
+
+    [MarshalAs(UnmanagedType.LPStr)]
+    public string Graph;
+
+    public int MaxActive;
+  }
+
+  [StructLayout(LayoutKind.Sequential)]
   public struct OnlineRecognizerConfig
   {
     public OnlineRecognizerConfig()
@@ -131,6 +146,7 @@ namespace SherpaOnnx
       Rule3MinUtteranceLength = 20.0F;
       HotwordsFile = "";
       HotwordsScore = 1.5F;
+      CtcFstDecoderConfig = new OnlineCtcFstDecoderConfig();
     }
     public FeatureConfig FeatConfig;
     public OnlineModelConfig ModelConfig;
@@ -167,6 +183,8 @@ namespace SherpaOnnx
     /// Bonus score for each token in hotwords.
     public float HotwordsScore;
+
+    public OnlineCtcFstDecoderConfig CtcFstDecoderConfig;
   }
   public class OnlineRecognizerResult
--- a/scripts/go/_internal/streaming-hlg-decoding/.gitignore 0 → 100644
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/.gitignore 0 → 100644
查看文件 @dbff2ea
+streaming-hlg-decoding
--- a/scripts/go/_internal/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/go.mod 0 → 100644
查看文件 @dbff2ea
+module streaming-hlg-decoding
+
+go 1.12
+
+replace github.com/k2-fsa/sherpa-onnx-go/sherpa_onnx => ../
--- a/scripts/go/_internal/streaming-hlg-decoding/main.go 0 → 120000
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/main.go 0 → 120000
查看文件 @dbff2ea
+../../../../go-api-examples/streaming-hlg-decoding/main.go
--- a/scripts/go/_internal/streaming-hlg-decoding/run.sh 0 → 120000
查看文件 @dbff2ea
+++ b/scripts/go/_internal/streaming-hlg-decoding/run.sh 0 → 120000
查看文件 @dbff2ea
+../../../../go-api-examples/streaming-hlg-decoding/run.sh
--- a/scripts/go/sherpa_onnx.go
查看文件 @dbff2ea
+++ b/scripts/go/sherpa_onnx.go
查看文件 @dbff2ea
@@ -99,6 +99,11 @@ type FeatureConfig struct {
 	FeatureDim int
 }
+type OnlineCtcFstDecoderConfig struct {
+	Graph     string
+	MaxActive int
+}
+
 // Configuration for the online/streaming recognizer.
 type OnlineRecognizerConfig struct {
 	FeatConfig  FeatureConfig
@@ -120,6 +125,7 @@ type OnlineRecognizerConfig struct {
 	Rule1MinTrailingSilence float32
 	Rule2MinTrailingSilence float32
 	Rule3MinUtteranceLength float32
+	CtcFstDecoderConfig     OnlineCtcFstDecoderConfig
 }
 // It contains the recognition result for a online stream.
@@ -190,6 +196,10 @@ func NewOnlineRecognizer(config *OnlineRecognizerConfig) *OnlineRecognizer {
 	c.rule2_min_trailing_silence = C.float(config.Rule2MinTrailingSilence)
 	c.rule3_min_utterance_length = C.float(config.Rule3MinUtteranceLength)
+	c.ctc_fst_decoder_config.graph = C.CString(config.CtcFstDecoderConfig.Graph)
+	defer C.free(unsafe.Pointer(c.ctc_fst_decoder_config.graph))
+	c.ctc_fst_decoder_config.max_active = C.int(config.CtcFstDecoderConfig.MaxActive)
+
 	recognizer := &OnlineRecognizer{}
 	recognizer.impl = C.CreateOnlineRecognizer(&c)
--- a/sherpa-onnx/c-api/c-api.cc
查看文件 @dbff2ea
+++ b/sherpa-onnx/c-api/c-api.cc
查看文件 @dbff2ea
@@ -99,6 +99,11 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
   recognizer_config.hotwords_score =
       SHERPA_ONNX_OR(config->hotwords_score, 1.5);
+  recognizer_config.ctc_fst_decoder_config.graph =
+      SHERPA_ONNX_OR(config->ctc_fst_decoder_config.graph, "");
+  recognizer_config.ctc_fst_decoder_config.max_active =
+      SHERPA_ONNX_OR(config->ctc_fst_decoder_config.max_active, 3000);
+
   if (config->model_config.debug) {
     SHERPA_ONNX_LOGE("%s\n", recognizer_config.ToString().c_str());
   }
--- a/sherpa-onnx/c-api/c-api.h
查看文件 @dbff2ea
+++ b/sherpa-onnx/c-api/c-api.h
查看文件 @dbff2ea
@@ -96,6 +96,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
   int32_t feature_dim;
 } SherpaOnnxFeatureConfig;
+SHERPA_ONNX_API typedef struct SherpaOnnxOnlineCtcFstDecoderConfig {
+  const char *graph;
+  int32_t max_active;
+} SherpaOnnxOnlineCtcFstDecoderConfig;
+
 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
   SherpaOnnxFeatureConfig feat_config;
   SherpaOnnxOnlineModelConfig model_config;
@@ -131,6 +136,8 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
   /// Bonus score for each token in hotwords.
   float hotwords_score;
+
+  SherpaOnnxOnlineCtcFstDecoderConfig ctc_fst_decoder_config;
 } SherpaOnnxOnlineRecognizerConfig;
 SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult {
--- a/swift-api-examples/.gitignore
查看文件 @dbff2ea
+++ b/swift-api-examples/.gitignore
查看文件 @dbff2ea
@@ -7,3 +7,4 @@ vits-vctk
 sherpa-onnx-paraformer-zh-2023-09-14
 !*.sh
 *.bak
+streaming-hlg-decode-file
--- a/swift-api-examples/SherpaOnnx.swift
查看文件 @dbff2ea
+++ b/swift-api-examples/SherpaOnnx.swift
查看文件 @dbff2ea
@@ -111,6 +111,15 @@ func sherpaOnnxFeatureConfig(
     feature_dim: Int32(featureDim))
 }
+func sherpaOnnxOnlineCtcFstDecoderConfig(
+  graph: String = "",
+  maxActive: Int = 3000
+) -> SherpaOnnxOnlineCtcFstDecoderConfig {
+  return SherpaOnnxOnlineCtcFstDecoderConfig(
+    graph: toCPointer(graph),
+    max_active: Int32(maxActive))
+}
+
 func sherpaOnnxOnlineRecognizerConfig(
   featConfig: SherpaOnnxFeatureConfig,
   modelConfig: SherpaOnnxOnlineModelConfig,
@@ -121,7 +130,8 @@ func sherpaOnnxOnlineRecognizerConfig(
   decodingMethod: String = "greedy_search",
   maxActivePaths: Int = 4,
   hotwordsFile: String = "",
-  hotwordsScore: Float = 1.5
+  hotwordsScore: Float = 1.5,
+  ctcFstDecoderConfig: SherpaOnnxOnlineCtcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig()
 ) -> SherpaOnnxOnlineRecognizerConfig {
   return SherpaOnnxOnlineRecognizerConfig(
     feat_config: featConfig,
@@ -133,7 +143,9 @@ func sherpaOnnxOnlineRecognizerConfig(
     rule2_min_trailing_silence: rule2MinTrailingSilence,
     rule3_min_utterance_length: rule3MinUtteranceLength,
     hotwords_file: toCPointer(hotwordsFile),
-    hotwords_score: hotwordsScore)
+    hotwords_score: hotwordsScore,
+    ctc_fst_decoder_config: ctcFstDecoderConfig
+  )
 }
 /// Wrapper for recognition result.
--- a/swift-api-examples/run-streaming-hlg-decode-file.sh 0 → 100755
查看文件 @dbff2ea
+++ b/swift-api-examples/run-streaming-hlg-decode-file.sh 0 → 100755
查看文件 @dbff2ea
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst ]; then
+  echo "Downloading the pre-trained model for testing."
+
+  wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+fi
+
+if [ ! -e ./streaming-hlg-decode-file ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./streaming-hlg-decode-file.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o streaming-hlg-decode-file
+
+  strip ./streaming-hlg-decode-file
+else
+  echo "./streaming-hlg-decode-file exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./streaming-hlg-decode-file
--- a/swift-api-examples/streaming-hlg-decode-file.swift 0 → 100644
查看文件 @dbff2ea
+++ b/swift-api-examples/streaming-hlg-decode-file.swift 0 → 100644
查看文件 @dbff2ea
+import AVFoundation
+
+extension AudioBuffer {
+  func array() -> [Float] {
+    return Array(UnsafeBufferPointer(self))
+  }
+}
+
+extension AVAudioPCMBuffer {
+  func array() -> [Float] {
+    return self.audioBufferList.pointee.mBuffers.array()
+  }
+}
+
+func run() {
+  let filePath =
+    "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/8k.wav"
+  let model =
+    "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx"
+  let tokens = "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt"
+  let zipfomer2CtcModelConfig = sherpaOnnxOnlineZipformer2CtcModelConfig(
+    model: model
+  )
+
+  let modelConfig = sherpaOnnxOnlineModelConfig(
+    tokens: tokens,
+    zipformer2Ctc: zipfomer2CtcModelConfig
+  )
+
+  let featConfig = sherpaOnnxFeatureConfig(
+    sampleRate: 16000,
+    featureDim: 80
+  )
+
+  let ctcFstDecoderConfig = sherpaOnnxOnlineCtcFstDecoderConfig(
+    graph: "./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst",
+    maxActive: 3000
+  )
+
+  var config = sherpaOnnxOnlineRecognizerConfig(
+    featConfig: featConfig,
+    modelConfig: modelConfig,
+    ctcFstDecoderConfig: ctcFstDecoderConfig
+  )
+
+  let recognizer = SherpaOnnxRecognizer(config: &config)
+
+  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+  let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+
+  let audioFormat = audioFile.processingFormat
+  assert(audioFormat.channelCount == 1)
+  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+
+  let audioFrameCount = UInt32(audioFile.length)
+  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+
+  try! audioFile.read(into: audioFileBuffer!)
+  let array: [Float]! = audioFileBuffer?.array()
+  recognizer.acceptWaveform(samples: array, sampleRate: Int(audioFormat.sampleRate))
+
+  let tailPadding = [Float](repeating: 0.0, count: 3200)
+  recognizer.acceptWaveform(samples: tailPadding, sampleRate: Int(audioFormat.sampleRate))
+
+  recognizer.inputFinished()
+  while recognizer.isReady() {
+    recognizer.decode()
+  }
+
+  let result = recognizer.getResult()
+  print("\nresult is:\n\(result.text)")
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
--- a/wasm/asr/sherpa-onnx-asr.js
查看文件 @dbff2ea
+++ b/wasm/asr/sherpa-onnx-asr.js
查看文件 @dbff2ea
@@ -43,6 +43,10 @@ function freeConfig(config, Module) {
     freeConfig(config.lm, Module)
   }
+  if ('ctcFstDecoder' in config) {
+    freeConfig(config.ctcFstDecoder, Module)
+  }
+
   Module._free(config.ptr);
 }
@@ -193,11 +197,26 @@ function initSherpaOnnxFeatureConfig(config, Module) {
   return {ptr: ptr, len: len};
 }
+function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
+  const len = 2 * 4;
+  const ptr = Module._malloc(len);
+
+  const graphLen = Module.lengthBytesUTF8(config.graph) + 1;
+  const buffer = Module._malloc(graphLen);
+  Module.stringToUTF8(config.graph, buffer, graphLen);
+
+  Module.setValue(ptr, buffer, 'i8*');
+  Module.setValue(ptr + 4, config.maxActive, 'i32');
+  return {ptr: ptr, len: len, buffer: buffer};
+}
+
 function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
   const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
   const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
+  const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
+      config.ctcFstDecoderConfig, Module)
-  const len = feat.len + model.len + 8 * 4;
+  const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len;
   const ptr = Module._malloc(len);
   let offset = 0;
@@ -243,8 +262,11 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
   Module.setValue(ptr + offset, config.hotwordsScore, 'float');
   offset += 4;
+  Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
+
   return {
-    buffer: buffer, ptr: ptr, len: len, feat: feat, model: model
+    buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
+        ctcFstDecoder: ctcFstDecoder
   }
 }
@@ -313,6 +335,10 @@ function createOnlineRecognizer(Module, myConfig) {
     rule3MinUtteranceLength: 20,
     hotwordsFile: '',
     hotwordsScore: 1.5,
+    ctcFstDecoderConfig: {
+      graph: '',
+      maxActive: 3000,
+    }
   };
   if (myConfig) {
     recognizerConfig = myConfig;
--- a/wasm/asr/sherpa-onnx-wasm-main-asr.cc
查看文件 @dbff2ea
+++ b/wasm/asr/sherpa-onnx-wasm-main-asr.cc
查看文件 @dbff2ea
@@ -22,9 +22,11 @@ static_assert(sizeof(SherpaOnnxOnlineModelConfig) ==
                       sizeof(SherpaOnnxOnlineZipformer2CtcModelConfig) + 5 * 4,
               "");
 static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
+static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
 static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
                   sizeof(SherpaOnnxFeatureConfig) +
-                      sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4,
+                      sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
+                      sizeof(SherpaOnnxOnlineCtcFstDecoderConfig),
               "");
 void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
@@ -67,6 +69,11 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
           config->rule3_min_utterance_length);
   fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
   fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
+
+  fprintf(stdout, "----------ctc fst decoder config----------\n");
+  fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
+  fprintf(stdout, "max_active: %d\n",
+          config->ctc_fst_decoder_config.max_active);
 }
 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {