Committed by
GitHub
Refactor C# code and support building nuget packages for cross-platforms (#144)
正在显示
39 个修改的文件
包含
2041 行增加
和
2302 行删除
.github/workflows/dot-net.yaml
0 → 100644
| 1 | +name: dot-net | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - dot-net | ||
| 7 | + tags: | ||
| 8 | + - '*' | ||
| 9 | + | ||
| 10 | +concurrency: | ||
| 11 | + group: dot-net-${{ github.ref }} | ||
| 12 | + cancel-in-progress: true | ||
| 13 | + | ||
| 14 | +jobs: | ||
| 15 | + build-libs: | ||
| 16 | + name: dot-net for ${{ matrix.os }} | ||
| 17 | + runs-on: ${{ matrix.os }} | ||
| 18 | + strategy: | ||
| 19 | + fail-fast: false | ||
| 20 | + matrix: | ||
| 21 | + os: [ubuntu-latest, windows-latest, macos-latest] | ||
| 22 | + | ||
| 23 | + steps: | ||
| 24 | + - uses: actions/checkout@v2 | ||
| 25 | + # see https://cibuildwheel.readthedocs.io/en/stable/changelog/ | ||
| 26 | + # for a list of versions | ||
| 27 | + - name: Build wheels | ||
| 28 | + uses: pypa/cibuildwheel@v2.11.4 | ||
| 29 | + env: | ||
| 30 | + CIBW_BEFORE_BUILD: "pip install -U cmake numpy" | ||
| 31 | + CIBW_BUILD: "cp38-*64" | ||
| 32 | + CIBW_SKIP: "cp27-* cp35-* cp36-* *-win32 pp* *-musllinux* *-manylinux_i686" | ||
| 33 | + CIBW_BUILD_VERBOSITY: 3 | ||
| 34 | + CIBW_ENVIRONMENT_LINUX: LD_LIBRARY_PATH='/project/build/bdist.linux-x86_64/wheel/sherpa_onnx/lib' | ||
| 35 | + CIBW_REPAIR_WHEEL_COMMAND_MACOS: "" | ||
| 36 | + | ||
| 37 | + - name: Display wheels | ||
| 38 | + shell: bash | ||
| 39 | + run: | | ||
| 40 | + ls -lh ./wheelhouse/*.whl | ||
| 41 | + unzip -l ./wheelhouse/*.whl | ||
| 42 | + | ||
| 43 | + - uses: actions/upload-artifact@v2 | ||
| 44 | + with: | ||
| 45 | + name: ${{ matrix.os }}-wheels | ||
| 46 | + path: ./wheelhouse/*.whl | ||
| 47 | + | ||
| 48 | + build-nuget-packages: | ||
| 49 | + name: build-nuget-packages | ||
| 50 | + runs-on: ubuntu-latest | ||
| 51 | + needs: build-libs | ||
| 52 | + | ||
| 53 | + steps: | ||
| 54 | + - uses: actions/checkout@v2 | ||
| 55 | + | ||
| 56 | + - name: Retrieve artifact from ubuntu-latest | ||
| 57 | + uses: actions/download-artifact@v2 | ||
| 58 | + with: | ||
| 59 | + name: ubuntu-latest-wheels | ||
| 60 | + path: ./linux | ||
| 61 | + | ||
| 62 | + - name: Retrieve artifact from macos-latest | ||
| 63 | + uses: actions/download-artifact@v2 | ||
| 64 | + with: | ||
| 65 | + name: macos-latest-wheels | ||
| 66 | + path: ./macos | ||
| 67 | + | ||
| 68 | + - name: Retrieve artifact from windows-latest | ||
| 69 | + uses: actions/download-artifact@v2 | ||
| 70 | + with: | ||
| 71 | + name: windows-latest-wheels | ||
| 72 | + path: ./windows | ||
| 73 | + | ||
| 74 | + - name: Display wheels | ||
| 75 | + shell: bash | ||
| 76 | + run: | | ||
| 77 | + tree . | ||
| 78 | + | ||
| 79 | + - name: Unzip Ubuntu wheels | ||
| 80 | + shell: bash | ||
| 81 | + run: | | ||
| 82 | + cd linux | ||
| 83 | + unzip ./*.whl | ||
| 84 | + tree . | ||
| 85 | + | ||
| 86 | + - name: Unzip macOS wheels | ||
| 87 | + shell: bash | ||
| 88 | + run: | | ||
| 89 | + cd macos | ||
| 90 | + unzip ./*.whl | ||
| 91 | + tree . | ||
| 92 | + | ||
| 93 | + - name: Unzip Windows wheels | ||
| 94 | + shell: bash | ||
| 95 | + run: | | ||
| 96 | + cd windows | ||
| 97 | + unzip ./*.whl | ||
| 98 | + cp -v ./*.dll sherpa_onnx/lib/ | ||
| 99 | + tree . | ||
| 100 | + | ||
| 101 | + - name: Setup .NET Core 3.1 | ||
| 102 | + uses: actions/setup-dotnet@v1 | ||
| 103 | + with: | ||
| 104 | + dotnet-version: 3.1.x | ||
| 105 | + | ||
| 106 | + - name: Setup .NET 7.0 | ||
| 107 | + uses: actions/setup-dotnet@v1 | ||
| 108 | + with: | ||
| 109 | + dotnet-version: 7.0.x | ||
| 110 | + | ||
| 111 | + - name: Check dotnet | ||
| 112 | + run: dotnet --info | ||
| 113 | + | ||
| 114 | + - name: build nuget packages | ||
| 115 | + shell: bash | ||
| 116 | + run: | | ||
| 117 | + cd scripts/dotnet | ||
| 118 | + ./run.sh | ||
| 119 | + ls -lh packages | ||
| 120 | + | ||
| 121 | + - uses: actions/upload-artifact@v2 | ||
| 122 | + name: upload nuget packages | ||
| 123 | + with: | ||
| 124 | + name: nuget-packages | ||
| 125 | + path: scripts/dotnet/packages/*.nupkg | ||
| 126 | + | ||
| 127 | + - name: publish .Net packages to nuget.org | ||
| 128 | + if: github.repository == 'csukuangfj/sherpa-onnx' || github.repository == 'k2-fsa/sherpa-onnx' | ||
| 129 | + shell: bash | ||
| 130 | + env: | ||
| 131 | + API_KEY: ${{ secrets.NUGET_API_KEY }} | ||
| 132 | + run: | | ||
| 133 | + # API_KEY is valid until 2024.05.02 | ||
| 134 | + cd scripts/dotnet/packages | ||
| 135 | + dotnet nuget push ./org.k2fsa.sherpa.onnx.*.nupkg --skip-duplicate --api-key $API_KEY --source https://api.nuget.org/v3/index.json |
.github/workflows/test-dot-net.yaml
0 → 100644
| 1 | +name: test-dot-net | ||
| 2 | + | ||
| 3 | +on: | ||
| 4 | + push: | ||
| 5 | + branches: | ||
| 6 | + - master | ||
| 7 | + paths: | ||
| 8 | + - '.github/workflows/test-dot-net' | ||
| 9 | + - 'dotnet-examples/**' | ||
| 10 | + | ||
| 11 | + pull_request: | ||
| 12 | + branches: | ||
| 13 | + - master | ||
| 14 | + paths: | ||
| 15 | + - '.github/workflows/test-dot-net' | ||
| 16 | + - 'dotnet-examples/**' | ||
| 17 | + | ||
| 18 | + schedule: | ||
| 19 | + # minute (0-59) | ||
| 20 | + # hour (0-23) | ||
| 21 | + # day of the month (1-31) | ||
| 22 | + # month (1-12) | ||
| 23 | + # day of the week (0-6) | ||
| 24 | + # nightly build at 23:50 UTC time every day | ||
| 25 | + - cron: "50 23 * * *" | ||
| 26 | + | ||
| 27 | +concurrency: | ||
| 28 | + group: test-dot-net | ||
| 29 | + cancel-in-progress: true | ||
| 30 | + | ||
| 31 | +permissions: | ||
| 32 | + contents: read | ||
| 33 | + | ||
| 34 | +jobs: | ||
| 35 | + test-dot-net: | ||
| 36 | + runs-on: ${{ matrix.os }} | ||
| 37 | + strategy: | ||
| 38 | + fail-fast: false | ||
| 39 | + matrix: | ||
| 40 | + os: [ubuntu-latest, macos-latest, windows-latest] | ||
| 41 | + | ||
| 42 | + steps: | ||
| 43 | + - uses: actions/checkout@v2 | ||
| 44 | + with: | ||
| 45 | + fetch-depth: 0 | ||
| 46 | + | ||
| 47 | + - name: Setup .NET Core 3.1 | ||
| 48 | + uses: actions/setup-dotnet@v1 | ||
| 49 | + with: | ||
| 50 | + dotnet-version: 3.1.x | ||
| 51 | + | ||
| 52 | + - name: Setup .NET 6.0 | ||
| 53 | + uses: actions/setup-dotnet@v1 | ||
| 54 | + with: | ||
| 55 | + dotnet-version: 6.0.x | ||
| 56 | + | ||
| 57 | + - name: Check dotnet | ||
| 58 | + run: dotnet --info | ||
| 59 | + | ||
| 60 | + - name: Decode a file | ||
| 61 | + shell: bash | ||
| 62 | + run: | | ||
| 63 | + cd dotnet-examples/ | ||
| 64 | + cd online-decode-files | ||
| 65 | + ./run.sh | ||
| 66 | + | ||
| 67 | + cd ../offline-decode-files | ||
| 68 | + ./run-nemo-ctc.sh | ||
| 69 | + ./run-paraformer.sh | ||
| 70 | + ./run-zipformer.sh |
| 1 | cmake_minimum_required(VERSION 3.13 FATAL_ERROR) | 1 | cmake_minimum_required(VERSION 3.13 FATAL_ERROR) |
| 2 | project(sherpa-onnx) | 2 | project(sherpa-onnx) |
| 3 | 3 | ||
| 4 | -set(SHERPA_ONNX_VERSION "1.4.1") | 4 | +set(SHERPA_ONNX_VERSION "1.4.2") |
| 5 | 5 | ||
| 6 | # Disable warning about | 6 | # Disable warning about |
| 7 | # | 7 | # |
| @@ -37,16 +37,12 @@ endif() | @@ -37,16 +37,12 @@ endif() | ||
| 37 | set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) | 37 | set(CMAKE_INSTALL_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) |
| 38 | set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) | 38 | set(CMAKE_BUILD_RPATH ${SHERPA_ONNX_RPATH_ORIGIN}) |
| 39 | 39 | ||
| 40 | -if(BUILD_SHARED_LIBS AND MSVC) | ||
| 41 | - set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) | ||
| 42 | -endif() | ||
| 43 | - | ||
| 44 | if(NOT CMAKE_BUILD_TYPE) | 40 | if(NOT CMAKE_BUILD_TYPE) |
| 45 | message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") | 41 | message(STATUS "No CMAKE_BUILD_TYPE given, default to Release") |
| 46 | set(CMAKE_BUILD_TYPE Release) | 42 | set(CMAKE_BUILD_TYPE Release) |
| 47 | endif() | 43 | endif() |
| 48 | 44 | ||
| 49 | -if(DEFINED ANDROID_ABI) | 45 | +if(DEFINED ANDROID_ABI AND NOT SHERPA_ONNX_ENABLE_JNI) |
| 50 | message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android") | 46 | message(STATUS "Set SHERPA_ONNX_ENABLE_JNI to ON for Android") |
| 51 | set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE) | 47 | set(SHERPA_ONNX_ENABLE_JNI ON CACHE BOOL "" FORCE) |
| 52 | endif() | 48 | endif() |
| @@ -61,6 +57,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS) | @@ -61,6 +57,10 @@ if(SHERPA_ONNX_ENABLE_JNI AND NOT BUILD_SHARED_LIBS) | ||
| 61 | set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) | 57 | set(BUILD_SHARED_LIBS ON CACHE BOOL "" FORCE) |
| 62 | endif() | 58 | endif() |
| 63 | 59 | ||
| 60 | +if(BUILD_SHARED_LIBS AND MSVC) | ||
| 61 | + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) | ||
| 62 | +endif() | ||
| 63 | + | ||
| 64 | message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") | 64 | message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") |
| 65 | message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") | 65 | message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") |
| 66 | message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") | 66 | message(STATUS "BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS}") |
| @@ -41,7 +41,6 @@ try: | @@ -41,7 +41,6 @@ try: | ||
| 41 | # -linux_x86_64.whl | 41 | # -linux_x86_64.whl |
| 42 | self.root_is_pure = False | 42 | self.root_is_pure = False |
| 43 | 43 | ||
| 44 | - | ||
| 45 | except ImportError: | 44 | except ImportError: |
| 46 | bdist_wheel = None | 45 | bdist_wheel = None |
| 47 | 46 | ||
| @@ -78,7 +77,6 @@ class BuildExtension(build_ext): | @@ -78,7 +77,6 @@ class BuildExtension(build_ext): | ||
| 78 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF " | 77 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_CHECK=OFF " |
| 79 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON " | 78 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PYTHON=ON " |
| 80 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON " | 79 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_PORTAUDIO=ON " |
| 81 | - extra_cmake_args += " -DSHERPA_ONNX_ENABLE_C_API=OFF " | ||
| 82 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON " | 80 | extra_cmake_args += " -DSHERPA_ONNX_ENABLE_WEBSOCKET=ON " |
| 83 | 81 | ||
| 84 | if "PYTHON_EXECUTABLE" not in cmake_args: | 82 | if "PYTHON_EXECUTABLE" not in cmake_args: |
| 1 | -// See https://aka.ms/new-console-template for more information | ||
| 2 | -// Copyright (c) 2023 by manyeyes | ||
| 3 | -using SherpaOnnx; | ||
| 4 | -/// Please refer to | ||
| 5 | -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 6 | -/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 7 | -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 8 | -/// from there. | ||
| 9 | - | ||
| 10 | -/// download model eg: | ||
| 11 | -/// (The directory where the application runs) | ||
| 12 | -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory | ||
| 13 | -/// cd /path/to | ||
| 14 | -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01 | ||
| 15 | -/// git clone https://huggingface.co/csukuangfj/paraformer-onnxruntime-python-example | ||
| 16 | -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-citrinet-512 | ||
| 17 | - | ||
| 18 | -/// NuGet for sherpa-onnx | ||
| 19 | -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx | ||
| 20 | -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx | ||
| 21 | - | ||
| 22 | -// transducer Usage: | ||
| 23 | -/* | ||
| 24 | - .\SherpaOnnx.Examples.exe ` | ||
| 25 | - --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` | ||
| 26 | - --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` | ||
| 27 | - --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` | ||
| 28 | - --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` | ||
| 29 | - --num-threads=2 ` | ||
| 30 | - --decoding-method=greedy_search ` | ||
| 31 | - --debug=false ` | ||
| 32 | - ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav | ||
| 33 | - */ | ||
| 34 | - | ||
| 35 | -// paraformer Usage: | ||
| 36 | -/* | ||
| 37 | - .\SherpaOnnx.Examples.exe ` | ||
| 38 | - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` | ||
| 39 | - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` | ||
| 40 | - --num-threads=2 ` | ||
| 41 | - --decoding-method=greedy_search ` | ||
| 42 | - --debug=false ` | ||
| 43 | - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav | ||
| 44 | - */ | ||
| 45 | - | ||
| 46 | -// paraformer Usage: | ||
| 47 | -/* | ||
| 48 | - .\SherpaOnnx.Examples.exe ` | ||
| 49 | - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` | ||
| 50 | - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` | ||
| 51 | - --num-threads=2 ` | ||
| 52 | - --decoding-method=greedy_search ` | ||
| 53 | - --debug=false ` | ||
| 54 | - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav | ||
| 55 | - */ | ||
| 56 | - | ||
| 57 | - | ||
| 58 | -internal class OfflineDecodeFiles | ||
| 59 | -{ | ||
| 60 | - static void Main(string[] args) | ||
| 61 | - { | ||
| 62 | - string usage = @" | ||
| 63 | ------------------------------ | ||
| 64 | -transducer Usage: | ||
| 65 | - --tokens=./all_models/sherpa-onnx-conformer-en-2023-03-18/tokens.txt ` | ||
| 66 | - --encoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/encoder-epoch-99-avg-1.onnx ` | ||
| 67 | - --decoder=./all_models/sherpa-onnx-conformer-en-2023-03-18/decoder-epoch-99-avg-1.onnx ` | ||
| 68 | - --joiner=./all_models/sherpa-onnx-conformer-en-2023-03-18/joiner-epoch-99-avg-1.onnx ` | ||
| 69 | - --num-threads=2 ` | ||
| 70 | - --decoding-method=greedy_search ` | ||
| 71 | - --debug=false ` | ||
| 72 | - ./all_models/sherpa-onnx-conformer-en-2023-03-18/test_wavs/0.wav | ||
| 73 | - | ||
| 74 | -paraformer Usage: | ||
| 75 | - --tokens=./all_models/paraformer-onnxruntime-python-example/tokens.txt ` | ||
| 76 | - --paraformer=./all_models/paraformer-onnxruntime-python-example/model.onnx ` | ||
| 77 | - --num-threads=2 ` | ||
| 78 | - --decoding-method=greedy_search ` | ||
| 79 | - --debug=false ` | ||
| 80 | - ./all_models/paraformer-onnxruntime-python-example/test_wavs/0.wav | ||
| 81 | - | ||
| 82 | -nemo Usage: | ||
| 83 | - --tokens=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/tokens.txt ` | ||
| 84 | - --nemo_ctc=./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/model.onnx ` | ||
| 85 | - --num-threads=2 ` | ||
| 86 | - --decoding-method=greedy_search ` | ||
| 87 | - --debug=false ` | ||
| 88 | - ./all_models/sherpa-onnx-nemo-ctc-en-citrinet-512/test_wavs/0.wav | ||
| 89 | ------------------------------ | ||
| 90 | -"; | ||
| 91 | - if (args.Length == 0) | ||
| 92 | - { | ||
| 93 | - System.Console.WriteLine("Please enter the correct parameters:"); | ||
| 94 | - System.Console.WriteLine(usage); | ||
| 95 | - System.Text.StringBuilder sb = new System.Text.StringBuilder(); | ||
| 96 | - //args = Console.ReadLine().Split(" "); | ||
| 97 | - while (true) | ||
| 98 | - { | ||
| 99 | - string input = Console.ReadLine(); | ||
| 100 | - sb.AppendLine(input); | ||
| 101 | - if (Console.ReadKey().Key == ConsoleKey.Enter) | ||
| 102 | - break; | ||
| 103 | - } | ||
| 104 | - args = sb.ToString().Split("\r\n"); | ||
| 105 | - } | ||
| 106 | - Console.WriteLine("Started!\n"); | ||
| 107 | - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; | ||
| 108 | - List<string> wavFiles = new List<string>(); | ||
| 109 | - Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles); | ||
| 110 | - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; | ||
| 111 | - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; | ||
| 112 | - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; | ||
| 113 | - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; | ||
| 114 | - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; | ||
| 115 | - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; | ||
| 116 | - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; | ||
| 117 | - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; | ||
| 118 | - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; | ||
| 119 | - | ||
| 120 | - OfflineTransducer offlineTransducer = new OfflineTransducer(); | ||
| 121 | - offlineTransducer.EncoderFilename = encoder; | ||
| 122 | - offlineTransducer.DecoderFilename = decoder; | ||
| 123 | - offlineTransducer.JoinerFilename = joiner; | ||
| 124 | - | ||
| 125 | - OfflineParaformer offlineParaformer = new OfflineParaformer(); | ||
| 126 | - offlineParaformer.Model = paraformer; | ||
| 127 | - | ||
| 128 | - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); | ||
| 129 | - offlineNemoEncDecCtc.Model = nemo_ctc; | ||
| 130 | - | ||
| 131 | - int numThreads = 0; | ||
| 132 | - int.TryParse(num_threads, out numThreads); | ||
| 133 | - bool isDebug = false; | ||
| 134 | - bool.TryParse(debug, out isDebug); | ||
| 135 | - | ||
| 136 | - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; | ||
| 137 | - | ||
| 138 | - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) | ||
| 139 | - && string.IsNullOrEmpty(paraformer) | ||
| 140 | - && string.IsNullOrEmpty(nemo_ctc)) | ||
| 141 | - { | ||
| 142 | - Console.WriteLine("Please specify at least one model"); | ||
| 143 | - Console.WriteLine(usage); | ||
| 144 | - } | ||
| 145 | - // batch decode | ||
| 146 | - TimeSpan total_duration = TimeSpan.Zero; | ||
| 147 | - TimeSpan start_time = TimeSpan.Zero; | ||
| 148 | - TimeSpan end_time = TimeSpan.Zero; | ||
| 149 | - List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>(); | ||
| 150 | - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) | ||
| 151 | - { | ||
| 152 | - OfflineRecognizer<OfflineTransducer> offlineRecognizer = new OfflineRecognizer<OfflineTransducer>( | ||
| 153 | - offlineTransducer, | ||
| 154 | - tokens, | ||
| 155 | - num_threads: numThreads, | ||
| 156 | - debug: isDebug, | ||
| 157 | - decoding_method: decodingMethod); | ||
| 158 | - List<float[]> samplesList = new List<float[]>(); | ||
| 159 | - foreach (string wavFile in wavFiles) | ||
| 160 | - { | ||
| 161 | - TimeSpan duration = TimeSpan.Zero; | ||
| 162 | - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 163 | - samplesList.Add(samples); | ||
| 164 | - total_duration += duration; | ||
| 165 | - } | ||
| 166 | - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); | ||
| 167 | - start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 168 | - offlineRecognizer.DecodeMultipleOfflineStreams(streams); | ||
| 169 | - results = offlineRecognizer.GetResults(streams); | ||
| 170 | - end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 171 | - } | ||
| 172 | - else if (!string.IsNullOrEmpty(paraformer)) | ||
| 173 | - { | ||
| 174 | - OfflineRecognizer<OfflineParaformer> offlineRecognizer = new OfflineRecognizer<OfflineParaformer>( | ||
| 175 | - offlineParaformer, | ||
| 176 | - tokens, | ||
| 177 | - num_threads: numThreads, | ||
| 178 | - debug: isDebug, | ||
| 179 | - decoding_method: decodingMethod); | ||
| 180 | - List<float[]> samplesList = new List<float[]>(); | ||
| 181 | - foreach (string wavFile in wavFiles) | ||
| 182 | - { | ||
| 183 | - TimeSpan duration = TimeSpan.Zero; | ||
| 184 | - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 185 | - samplesList.Add(samples); | ||
| 186 | - total_duration += duration; | ||
| 187 | - } | ||
| 188 | - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); | ||
| 189 | - start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 190 | - offlineRecognizer.DecodeMultipleOfflineStreams(streams); | ||
| 191 | - results = offlineRecognizer.GetResults(streams); | ||
| 192 | - end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 193 | - } | ||
| 194 | - else if (!string.IsNullOrEmpty(nemo_ctc)) | ||
| 195 | - { | ||
| 196 | - OfflineRecognizer<OfflineNemoEncDecCtc> offlineRecognizer = new OfflineRecognizer<OfflineNemoEncDecCtc>( | ||
| 197 | - offlineNemoEncDecCtc, | ||
| 198 | - tokens, | ||
| 199 | - num_threads: numThreads, | ||
| 200 | - debug: isDebug, | ||
| 201 | - decoding_method: decodingMethod); | ||
| 202 | - List<float[]> samplesList = new List<float[]>(); | ||
| 203 | - foreach (string wavFile in wavFiles) | ||
| 204 | - { | ||
| 205 | - TimeSpan duration = TimeSpan.Zero; | ||
| 206 | - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 207 | - samplesList.Add(samples); | ||
| 208 | - total_duration += duration; | ||
| 209 | - } | ||
| 210 | - OfflineStream[] streams = offlineRecognizer.CreateOfflineStream(samplesList); | ||
| 211 | - start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 212 | - offlineRecognizer.DecodeMultipleOfflineStreams(streams); | ||
| 213 | - results = offlineRecognizer.GetResults(streams); | ||
| 214 | - end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 215 | - } | ||
| 216 | - | ||
| 217 | - foreach (var item in results.Zip<OfflineRecognizerResultEntity, string>(wavFiles)) | ||
| 218 | - { | ||
| 219 | - Console.WriteLine("wavFile:{0}", item.Second); | ||
| 220 | - Console.WriteLine("text:{0}", item.First.text.ToLower()); | ||
| 221 | - Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); | ||
| 222 | - } | ||
| 223 | - | ||
| 224 | - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; | ||
| 225 | - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; | ||
| 226 | - Console.WriteLine("num_threads:{0}", num_threads); | ||
| 227 | - Console.WriteLine("decoding_method:{0}", decodingMethod); | ||
| 228 | - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); | ||
| 229 | - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); | ||
| 230 | - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); | ||
| 231 | - | ||
| 232 | - Console.WriteLine("End!"); | ||
| 233 | - } | ||
| 234 | - | ||
| 235 | - static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles) | ||
| 236 | - { | ||
| 237 | - Dictionary<string, string> argsDict = new Dictionary<string, string>(); | ||
| 238 | - foreach (string input in args) | ||
| 239 | - { | ||
| 240 | - string[] ss = input.Split("="); | ||
| 241 | - if (ss.Length == 1) | ||
| 242 | - { | ||
| 243 | - if (!string.IsNullOrEmpty(ss[0])) | ||
| 244 | - { | ||
| 245 | - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); | ||
| 246 | - } | ||
| 247 | - } | ||
| 248 | - else | ||
| 249 | - { | ||
| 250 | - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); | ||
| 251 | - } | ||
| 252 | - } | ||
| 253 | - return argsDict; | ||
| 254 | - } | ||
| 255 | -} |
csharp-api-examples/OnlineDecodeFile.cs
已删除
100644 → 0
| 1 | -// See https://aka.ms/new-console-template for more information | ||
| 2 | -// Copyright (c) 2023 by manyeyes | ||
| 3 | -using SherpaOnnx; | ||
| 4 | -/// Please refer to | ||
| 5 | -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 6 | -/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 7 | -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 8 | -/// from there. | ||
| 9 | - | ||
| 10 | -/// download model eg: | ||
| 11 | -/// (The directory where the application runs) | ||
| 12 | -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory | ||
| 13 | -/// cd /path/to | ||
| 14 | -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 15 | - | ||
| 16 | -/// NuGet for sherpa-onnx | ||
| 17 | -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx | ||
| 18 | -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx | ||
| 19 | - | ||
| 20 | -// transducer Usage: | ||
| 21 | -/* | ||
| 22 | - .\SherpaOnnx.Examples.exe ` | ||
| 23 | - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 24 | - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 25 | - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 26 | - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 27 | - --num-threads=2 ` | ||
| 28 | - --decoding-method=modified_beam_search ` | ||
| 29 | - --debug=false ` | ||
| 30 | - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav | ||
| 31 | - */ | ||
| 32 | - | ||
| 33 | -internal class OnlineDecodeFile | ||
| 34 | -{ | ||
| 35 | - static void Main(string[] args) | ||
| 36 | - { | ||
| 37 | - string usage = @" | ||
| 38 | ------------------------------ | ||
| 39 | -transducer Usage: | ||
| 40 | - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 41 | - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 42 | - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 43 | - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 44 | - --num-threads=2 ` | ||
| 45 | - --decoding-method=modified_beam_search ` | ||
| 46 | - --debug=false ` | ||
| 47 | - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav | ||
| 48 | ------------------------------ | ||
| 49 | -"; | ||
| 50 | - if (args.Length == 0) | ||
| 51 | - { | ||
| 52 | - System.Console.WriteLine("Please enter the correct parameters:"); | ||
| 53 | - System.Console.WriteLine(usage); | ||
| 54 | - System.Text.StringBuilder sb = new System.Text.StringBuilder(); | ||
| 55 | - //args = Console.ReadLine().Split(" "); | ||
| 56 | - while (true) | ||
| 57 | - { | ||
| 58 | - string input = Console.ReadLine(); | ||
| 59 | - sb.AppendLine(input); | ||
| 60 | - if (Console.ReadKey().Key == ConsoleKey.Enter) | ||
| 61 | - break; | ||
| 62 | - } | ||
| 63 | - args = sb.ToString().Split("\r\n"); | ||
| 64 | - } | ||
| 65 | - Console.WriteLine("Started!\n"); | ||
| 66 | - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; | ||
| 67 | - List<string> wavFiles = new List<string>(); | ||
| 68 | - Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles); | ||
| 69 | - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; | ||
| 70 | - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; | ||
| 71 | - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; | ||
| 72 | - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; | ||
| 73 | - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; | ||
| 74 | - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; | ||
| 75 | - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; | ||
| 76 | - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; | ||
| 77 | - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; | ||
| 78 | - | ||
| 79 | - OfflineTransducer offlineTransducer = new OfflineTransducer(); | ||
| 80 | - offlineTransducer.EncoderFilename = encoder; | ||
| 81 | - offlineTransducer.DecoderFilename = decoder; | ||
| 82 | - offlineTransducer.JoinerFilename = joiner; | ||
| 83 | - | ||
| 84 | - OfflineParaformer offlineParaformer = new OfflineParaformer(); | ||
| 85 | - offlineParaformer.Model = paraformer; | ||
| 86 | - | ||
| 87 | - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); | ||
| 88 | - offlineNemoEncDecCtc.Model = nemo_ctc; | ||
| 89 | - | ||
| 90 | - int numThreads = 0; | ||
| 91 | - int.TryParse(num_threads, out numThreads); | ||
| 92 | - bool isDebug = false; | ||
| 93 | - bool.TryParse(debug, out isDebug); | ||
| 94 | - | ||
| 95 | - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; | ||
| 96 | - | ||
| 97 | - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) | ||
| 98 | - && string.IsNullOrEmpty(paraformer) | ||
| 99 | - && string.IsNullOrEmpty(nemo_ctc)) | ||
| 100 | - { | ||
| 101 | - Console.WriteLine("Please specify at least one model"); | ||
| 102 | - Console.WriteLine(usage); | ||
| 103 | - } | ||
| 104 | - // batch decode | ||
| 105 | - TimeSpan total_duration = TimeSpan.Zero; | ||
| 106 | - TimeSpan start_time = TimeSpan.Zero; | ||
| 107 | - TimeSpan end_time = TimeSpan.Zero; | ||
| 108 | - List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>(); | ||
| 109 | - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) | ||
| 110 | - { | ||
| 111 | - OnlineTransducer onlineTransducer = new OnlineTransducer(); | ||
| 112 | - onlineTransducer.EncoderFilename = encoder; | ||
| 113 | - onlineTransducer.DecoderFilename = decoder; | ||
| 114 | - onlineTransducer.JoinerFilename = joiner; | ||
| 115 | - //test online | ||
| 116 | - OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>( | ||
| 117 | - onlineTransducer, | ||
| 118 | - tokens, | ||
| 119 | - num_threads: numThreads, | ||
| 120 | - debug: isDebug, | ||
| 121 | - decoding_method: decodingMethod); | ||
| 122 | - foreach (string wavFile in wavFiles) | ||
| 123 | - { | ||
| 124 | - TimeSpan duration = TimeSpan.Zero; | ||
| 125 | - List<float[]> samplesList = AudioHelper.GetChunkSamplesList(wavFile, ref duration); | ||
| 126 | - OnlineStream stream = onlineRecognizer.CreateStream(); | ||
| 127 | - start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 128 | - for (int i = 0; i < samplesList.Count; i++) | ||
| 129 | - { | ||
| 130 | - onlineRecognizer.AcceptWaveForm(stream, 16000, samplesList[i]); | ||
| 131 | - onlineRecognizer.DecodeStream(stream); | ||
| 132 | - OnlineRecognizerResultEntity result_on = onlineRecognizer.GetResult(stream); | ||
| 133 | - Console.WriteLine(result_on.text); | ||
| 134 | - } | ||
| 135 | - total_duration += duration; | ||
| 136 | - } | ||
| 137 | - end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 138 | - } | ||
| 139 | - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; | ||
| 140 | - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; | ||
| 141 | - Console.WriteLine("num_threads:{0}", num_threads); | ||
| 142 | - Console.WriteLine("decoding_method:{0}", decodingMethod); | ||
| 143 | - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); | ||
| 144 | - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); | ||
| 145 | - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); | ||
| 146 | - | ||
| 147 | - Console.WriteLine("End!"); | ||
| 148 | - } | ||
| 149 | - | ||
| 150 | - static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles) | ||
| 151 | - { | ||
| 152 | - Dictionary<string, string> argsDict = new Dictionary<string, string>(); | ||
| 153 | - foreach (string input in args) | ||
| 154 | - { | ||
| 155 | - string[] ss = input.Split("="); | ||
| 156 | - if (ss.Length == 1) | ||
| 157 | - { | ||
| 158 | - if (!string.IsNullOrEmpty(ss[0])) | ||
| 159 | - { | ||
| 160 | - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); | ||
| 161 | - } | ||
| 162 | - } | ||
| 163 | - else | ||
| 164 | - { | ||
| 165 | - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); | ||
| 166 | - } | ||
| 167 | - } | ||
| 168 | - return argsDict; | ||
| 169 | - } | ||
| 170 | - | ||
| 171 | -} |
csharp-api-examples/OnlineDecodeFiles.cs
已删除
100644 → 0
| 1 | -// See https://aka.ms/new-console-template for more information | ||
| 2 | -// Copyright (c) 2023 by manyeyes | ||
| 3 | -using SherpaOnnx; | ||
| 4 | -/// Please refer to | ||
| 5 | -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 6 | -/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 7 | -/// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 8 | -/// from there. | ||
| 9 | - | ||
| 10 | -/// download model eg: | ||
| 11 | -/// (The directory where the application runs) | ||
| 12 | -/// [/path/to]=System.AppDomain.CurrentDomain.BaseDirectory | ||
| 13 | -/// cd /path/to | ||
| 14 | -/// git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 15 | - | ||
| 16 | -/// NuGet for sherpa-onnx | ||
| 17 | -/// PM > Install-Package NAudio -version 2.1.0 -Project sherpa-onnx | ||
| 18 | -/// PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx | ||
| 19 | - | ||
| 20 | -// transducer Usage: | ||
| 21 | -/* | ||
| 22 | - .\SherpaOnnx.Examples.exe ` | ||
| 23 | - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 24 | - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 25 | - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 26 | - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 27 | - --num-threads=2 ` | ||
| 28 | - --decoding-method=modified_beam_search ` | ||
| 29 | - --debug=false ` | ||
| 30 | - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` | ||
| 31 | - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav | ||
| 32 | - */ | ||
| 33 | - | ||
| 34 | -internal class OnlineDecodeFiles | ||
| 35 | -{ | ||
| 36 | - static void Main(string[] args) | ||
| 37 | - { | ||
| 38 | - string usage = @" | ||
| 39 | ------------------------------ | ||
| 40 | -transducer Usage: | ||
| 41 | - --tokens=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt ` | ||
| 42 | - --encoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx ` | ||
| 43 | - --decoder=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx ` | ||
| 44 | - --joiner=./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx ` | ||
| 45 | - --num-threads=2 ` | ||
| 46 | - --decoding-method=modified_beam_search ` | ||
| 47 | - --debug=false ` | ||
| 48 | - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav ` | ||
| 49 | - ./all_models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav | ||
| 50 | ------------------------------ | ||
| 51 | -"; | ||
| 52 | - if (args.Length == 0) | ||
| 53 | - { | ||
| 54 | - System.Console.WriteLine("Please enter the correct parameters:"); | ||
| 55 | - System.Console.WriteLine(usage); | ||
| 56 | - System.Text.StringBuilder sb = new System.Text.StringBuilder(); | ||
| 57 | - //args = Console.ReadLine().Split(" "); | ||
| 58 | - while (true) | ||
| 59 | - { | ||
| 60 | - string input = Console.ReadLine(); | ||
| 61 | - sb.AppendLine(input); | ||
| 62 | - if (Console.ReadKey().Key == ConsoleKey.Enter) | ||
| 63 | - break; | ||
| 64 | - } | ||
| 65 | - args = sb.ToString().Split("\r\n"); | ||
| 66 | - } | ||
| 67 | - Console.WriteLine("Started!\n"); | ||
| 68 | - string? applicationBase = System.AppDomain.CurrentDomain.BaseDirectory; | ||
| 69 | - List<string> wavFiles = new List<string>(); | ||
| 70 | - Dictionary<string, string> argsDict = GetDict(args, applicationBase, ref wavFiles); | ||
| 71 | - string decoder = argsDict.ContainsKey("decoder") ? Path.Combine(applicationBase, argsDict["decoder"]) : ""; | ||
| 72 | - string encoder = argsDict.ContainsKey("encoder") ? Path.Combine(applicationBase, argsDict["encoder"]) : ""; | ||
| 73 | - string joiner = argsDict.ContainsKey("joiner") ? Path.Combine(applicationBase, argsDict["joiner"]) : ""; | ||
| 74 | - string paraformer = argsDict.ContainsKey("paraformer") ? Path.Combine(applicationBase, argsDict["paraformer"]) : ""; | ||
| 75 | - string nemo_ctc = argsDict.ContainsKey("nemo_ctc") ? Path.Combine(applicationBase, argsDict["nemo_ctc"]) : ""; | ||
| 76 | - string tokens = argsDict.ContainsKey("tokens") ? Path.Combine(applicationBase, argsDict["tokens"]) : ""; | ||
| 77 | - string num_threads = argsDict.ContainsKey("num_threads") ? argsDict["num_threads"] : ""; | ||
| 78 | - string decoding_method = argsDict.ContainsKey("decoding_method") ? argsDict["decoding_method"] : ""; | ||
| 79 | - string debug = argsDict.ContainsKey("debug") ? argsDict["debug"] : ""; | ||
| 80 | - | ||
| 81 | - OfflineTransducer offlineTransducer = new OfflineTransducer(); | ||
| 82 | - offlineTransducer.EncoderFilename = encoder; | ||
| 83 | - offlineTransducer.DecoderFilename = decoder; | ||
| 84 | - offlineTransducer.JoinerFilename = joiner; | ||
| 85 | - | ||
| 86 | - OfflineParaformer offlineParaformer = new OfflineParaformer(); | ||
| 87 | - offlineParaformer.Model = paraformer; | ||
| 88 | - | ||
| 89 | - OfflineNemoEncDecCtc offlineNemoEncDecCtc = new OfflineNemoEncDecCtc(); | ||
| 90 | - offlineNemoEncDecCtc.Model = nemo_ctc; | ||
| 91 | - | ||
| 92 | - int numThreads = 0; | ||
| 93 | - int.TryParse(num_threads, out numThreads); | ||
| 94 | - bool isDebug = false; | ||
| 95 | - bool.TryParse(debug, out isDebug); | ||
| 96 | - | ||
| 97 | - string decodingMethod = string.IsNullOrEmpty(decoding_method) ? "" : decoding_method; | ||
| 98 | - | ||
| 99 | - if ((string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner)) | ||
| 100 | - && string.IsNullOrEmpty(paraformer) | ||
| 101 | - && string.IsNullOrEmpty(nemo_ctc)) | ||
| 102 | - { | ||
| 103 | - Console.WriteLine("Please specify at least one model"); | ||
| 104 | - Console.WriteLine(usage); | ||
| 105 | - } | ||
| 106 | - // batch decode | ||
| 107 | - TimeSpan total_duration = TimeSpan.Zero; | ||
| 108 | - TimeSpan start_time = TimeSpan.Zero; | ||
| 109 | - TimeSpan end_time = TimeSpan.Zero; | ||
| 110 | - List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>(); | ||
| 111 | - if (!(string.IsNullOrEmpty(encoder) || string.IsNullOrEmpty(decoder) || string.IsNullOrEmpty(joiner))) | ||
| 112 | - { | ||
| 113 | - OnlineTransducer onlineTransducer = new OnlineTransducer(); | ||
| 114 | - onlineTransducer.EncoderFilename = encoder; | ||
| 115 | - onlineTransducer.DecoderFilename = decoder; | ||
| 116 | - onlineTransducer.JoinerFilename = joiner; | ||
| 117 | - //test online | ||
| 118 | - OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>( | ||
| 119 | - onlineTransducer, | ||
| 120 | - tokens, | ||
| 121 | - num_threads: numThreads, | ||
| 122 | - debug: isDebug, | ||
| 123 | - decoding_method: decodingMethod); | ||
| 124 | - List<float[]> samplesList = new List<float[]>(); | ||
| 125 | - foreach (string wavFile in wavFiles) | ||
| 126 | - { | ||
| 127 | - TimeSpan duration = TimeSpan.Zero; | ||
| 128 | - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 129 | - samplesList.Add(samples); | ||
| 130 | - total_duration += duration; | ||
| 131 | - } | ||
| 132 | - start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 133 | - List<OnlineStream> streams = new List<OnlineStream>(); | ||
| 134 | - foreach (float[] samples in samplesList) | ||
| 135 | - { | ||
| 136 | - OnlineStream stream = onlineRecognizer.CreateStream(); | ||
| 137 | - onlineRecognizer.AcceptWaveForm(stream, 16000, samples); | ||
| 138 | - streams.Add(stream); | ||
| 139 | - onlineRecognizer.InputFinished(stream); | ||
| 140 | - } | ||
| 141 | - onlineRecognizer.DecodeMultipleStreams(streams); | ||
| 142 | - results = onlineRecognizer.GetResults(streams); | ||
| 143 | - foreach (OnlineRecognizerResultEntity result in results) | ||
| 144 | - { | ||
| 145 | - Console.WriteLine(result.text); | ||
| 146 | - } | ||
| 147 | - end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 148 | - } | ||
| 149 | - | ||
| 150 | - | ||
| 151 | - foreach (var item in results.Zip<OnlineRecognizerResultEntity, string>(wavFiles)) | ||
| 152 | - { | ||
| 153 | - Console.WriteLine("wavFile:{0}", item.Second); | ||
| 154 | - Console.WriteLine("text:{0}", item.First.text.ToLower()); | ||
| 155 | - Console.WriteLine("text_len:{0}\n", item.First.text_len.ToString()); | ||
| 156 | - } | ||
| 157 | - | ||
| 158 | - double elapsed_milliseconds = end_time.TotalMilliseconds - start_time.TotalMilliseconds; | ||
| 159 | - double rtf = elapsed_milliseconds / total_duration.TotalMilliseconds; | ||
| 160 | - Console.WriteLine("num_threads:{0}", num_threads); | ||
| 161 | - Console.WriteLine("decoding_method:{0}", decodingMethod); | ||
| 162 | - Console.WriteLine("elapsed_milliseconds:{0}", elapsed_milliseconds.ToString()); | ||
| 163 | - Console.WriteLine("wave total_duration_milliseconds:{0}", total_duration.TotalMilliseconds.ToString()); | ||
| 164 | - Console.WriteLine("Real time factor (RTF):{0}", rtf.ToString()); | ||
| 165 | - | ||
| 166 | - Console.WriteLine("End!"); | ||
| 167 | - } | ||
| 168 | - | ||
| 169 | - public void AnotherWayOfDecodeFiles(string encoder, string decoder, string joiner, string tokens, int numThreads, bool isDebug, string decodingMethod, List<string> wavFiles, ref TimeSpan total_duration) | ||
| 170 | - { | ||
| 171 | - OnlineTransducer onlineTransducer = new OnlineTransducer(); | ||
| 172 | - onlineTransducer.EncoderFilename = encoder; | ||
| 173 | - onlineTransducer.DecoderFilename = decoder; | ||
| 174 | - onlineTransducer.JoinerFilename = joiner; | ||
| 175 | - //test online | ||
| 176 | - OnlineRecognizer<OnlineTransducer> onlineRecognizer = new OnlineRecognizer<OnlineTransducer>( | ||
| 177 | - onlineTransducer, | ||
| 178 | - tokens, | ||
| 179 | - num_threads: numThreads, | ||
| 180 | - debug: isDebug, | ||
| 181 | - decoding_method: decodingMethod); | ||
| 182 | - List<float[]> samplesList = new List<float[]>(); | ||
| 183 | - foreach (string wavFile in wavFiles) | ||
| 184 | - { | ||
| 185 | - TimeSpan duration = TimeSpan.Zero; | ||
| 186 | - float[] samples = AudioHelper.GetFileSamples(wavFile, ref duration); | ||
| 187 | - samplesList.Add(samples); | ||
| 188 | - total_duration += duration; | ||
| 189 | - } | ||
| 190 | - TimeSpan start_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 191 | - List<OnlineStream> streams = onlineRecognizer.CreateStreams(samplesList); | ||
| 192 | - onlineRecognizer.DecodeMultipleStreams(streams); | ||
| 193 | - List<OnlineRecognizerResultEntity> results = onlineRecognizer.GetResults(streams); | ||
| 194 | - foreach (OnlineRecognizerResultEntity result in results) | ||
| 195 | - { | ||
| 196 | - Console.WriteLine(result.text); | ||
| 197 | - } | ||
| 198 | - TimeSpan end_time = new TimeSpan(DateTime.Now.Ticks); | ||
| 199 | - } | ||
| 200 | - | ||
| 201 | - static Dictionary<string, string> GetDict(string[] args, string applicationBase, ref List<string> wavFiles) | ||
| 202 | - { | ||
| 203 | - Dictionary<string, string> argsDict = new Dictionary<string, string>(); | ||
| 204 | - foreach (string input in args) | ||
| 205 | - { | ||
| 206 | - string[] ss = input.Split("="); | ||
| 207 | - if (ss.Length == 1) | ||
| 208 | - { | ||
| 209 | - if (!string.IsNullOrEmpty(ss[0])) | ||
| 210 | - { | ||
| 211 | - wavFiles.Add(Path.Combine(applicationBase, ss[0].Trim(new char[] { '-', '`', ' ' }))); | ||
| 212 | - } | ||
| 213 | - } | ||
| 214 | - else | ||
| 215 | - { | ||
| 216 | - argsDict.Add(ss[0].Trim(new char[] { '-', '`', ' ' }).Replace("-", "_"), ss[1].Trim(new char[] { '-', '`', ' ' })); | ||
| 217 | - } | ||
| 218 | - } | ||
| 219 | - return argsDict; | ||
| 220 | - } | ||
| 221 | -} |
csharp-api-examples/README.md
已删除
100644 → 0
| 1 | -#ProjectReference csharp-api | ||
| 2 | -`<ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" />` | ||
| 3 | -The location of the 'SherpaOnnx' file is ../sherpa-onnx/csharp-api. | ||
| 4 | -This C # API is cross platform and you can compile it yourself in Windows, Mac OS, and Linux environments. | ||
| 5 | - | ||
| 6 | ------------- | ||
| 7 | -Alternatively, install sherpaonnx through nuget. | ||
| 8 | -#NuGet for sherpa-onnx | ||
| 9 | -PM > Install-Package SherpaOnnxCsharp -Project sherpa-onnx |
csharp-api-examples/Utils/AudioHelper.cs
已删除
100644 → 0
| 1 | -using NAudio.Wave; | ||
| 2 | -using System; | ||
| 3 | -using System.Collections.Generic; | ||
| 4 | -using System.Diagnostics; | ||
| 5 | -using System.Linq; | ||
| 6 | -using System.Text; | ||
| 7 | -using System.Threading.Tasks; | ||
| 8 | - | ||
| 9 | -/// <summary> | ||
| 10 | -/// audio processing | ||
| 11 | -/// Copyright (c) 2023 by manyeyes | ||
| 12 | -/// </summary> | ||
| 13 | -public class AudioHelper | ||
| 14 | -{ | ||
| 15 | - public static float[] GetFileSamples(string wavFilePath, ref TimeSpan duration) | ||
| 16 | - { | ||
| 17 | - if (!File.Exists(wavFilePath)) | ||
| 18 | - { | ||
| 19 | - Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); | ||
| 20 | - return new float[1]; | ||
| 21 | - } | ||
| 22 | - AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); | ||
| 23 | - byte[] datas = new byte[_audioFileReader.Length]; | ||
| 24 | - _audioFileReader.Read(datas, 0, datas.Length); | ||
| 25 | - duration = _audioFileReader.TotalTime; | ||
| 26 | - float[] wavdata = new float[datas.Length / sizeof(float)]; | ||
| 27 | - Buffer.BlockCopy(datas, 0, wavdata, 0, datas.Length); | ||
| 28 | - return wavdata; | ||
| 29 | - } | ||
| 30 | - | ||
| 31 | - public static List<float[]> GetChunkSamplesList(string wavFilePath, ref TimeSpan duration) | ||
| 32 | - { | ||
| 33 | - List<float[]> wavdatas = new List<float[]>(); | ||
| 34 | - if (!File.Exists(wavFilePath)) | ||
| 35 | - { | ||
| 36 | - Trace.Assert(File.Exists(wavFilePath), "file does not exist:" + wavFilePath); | ||
| 37 | - wavdatas.Add(new float[1]); | ||
| 38 | - return wavdatas; | ||
| 39 | - } | ||
| 40 | - AudioFileReader _audioFileReader = new AudioFileReader(wavFilePath); | ||
| 41 | - byte[] datas = new byte[_audioFileReader.Length]; | ||
| 42 | - int chunkSize = 16000;// datas.Length / sizeof(float); | ||
| 43 | - int chunkNum = (int)Math.Ceiling((double)datas.Length / chunkSize); | ||
| 44 | - for (int i = 0; i < chunkNum; i++) | ||
| 45 | - { | ||
| 46 | - int offset = 0; | ||
| 47 | - int dataCount = 0; | ||
| 48 | - if (Math.Abs(datas.Length - i * chunkSize) > chunkSize) | ||
| 49 | - { | ||
| 50 | - offset = i * chunkSize; | ||
| 51 | - dataCount = chunkSize; | ||
| 52 | - } | ||
| 53 | - else | ||
| 54 | - { | ||
| 55 | - offset = i * chunkSize; | ||
| 56 | - dataCount = datas.Length - i * chunkSize; | ||
| 57 | - } | ||
| 58 | - _audioFileReader.Read(datas, offset, dataCount); | ||
| 59 | - duration += _audioFileReader.TotalTime; | ||
| 60 | - float[] wavdata = new float[chunkSize / sizeof(float)]; | ||
| 61 | - Buffer.BlockCopy(datas, offset, wavdata, 0, dataCount); | ||
| 62 | - wavdatas.Add(wavdata); | ||
| 63 | - | ||
| 64 | - } | ||
| 65 | - return wavdatas; | ||
| 66 | - } | ||
| 67 | -} |
dotnet-examples/.editorconfig
0 → 100644
dotnet-examples/.gitignore
0 → 100644
| 1 | +// Copyright (c) 2023 Xiaomi Corporation | ||
| 2 | +// Copyright (c) 2023 by manyeyes | ||
| 3 | +// | ||
| 4 | +// This file shows how to use a non-streaming model to decode files | ||
| 5 | +// Please refer to | ||
| 6 | +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 7 | +// to download non-streaming models | ||
| 8 | +using CommandLine.Text; | ||
| 9 | +using CommandLine; | ||
| 10 | +using SherpaOnnx; | ||
| 11 | +using System.Collections.Generic; | ||
| 12 | +using System; | ||
| 13 | + | ||
| 14 | +class OfflineDecodeFiles | ||
| 15 | +{ | ||
| 16 | + class Options | ||
| 17 | + { | ||
| 18 | + [Option(Required = false, HelpText = "Path to tokens.txt")] | ||
| 19 | + public string Tokens { get; set; } | ||
| 20 | + | ||
| 21 | + [Option(Required = false, HelpText = "Path to encoder.onnx. Used only for transducer models")] | ||
| 22 | + public string Encoder { get; set; } | ||
| 23 | + | ||
| 24 | + [Option(Required = false, HelpText = "Path to decoder.onnx. Used only for transducer models")] | ||
| 25 | + public string Decoder { get; set; } | ||
| 26 | + | ||
| 27 | + [Option(Required = false, HelpText = "Path to joiner.onnx. Used only for transducer models")] | ||
| 28 | + public string Joiner { get; set; } | ||
| 29 | + | ||
| 30 | + [Option(Required = false, HelpText = "Path to model.onnx. Used only for paraformer models")] | ||
| 31 | + public string Paraformer { get; set; } | ||
| 32 | + | ||
| 33 | + [Option("nemo-ctc", Required = false, HelpText = "Path to model.onnx. Used only for NeMo CTC models")] | ||
| 34 | + public string NeMoCtc { get; set; } | ||
| 35 | + | ||
| 36 | + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] | ||
| 37 | + public int NumThreads { get; set; } | ||
| 38 | + | ||
| 39 | + [Option("decoding-method", Required = false, Default = "greedy_search", | ||
| 40 | + HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] | ||
| 41 | + public string DecodingMethod { get; set; } | ||
| 42 | + | ||
| 43 | + [Option("max-active-paths", Required = false, Default = 4, | ||
| 44 | + HelpText = @"Used only when --decoding--method is modified_beam_search. | ||
| 45 | +It specifies number of active paths to keep during the search")] | ||
| 46 | + public int MaxActivePaths { get; set; } | ||
| 47 | + | ||
| 48 | + [Option("files", Required = true, HelpText = "Audio files for decoding")] | ||
| 49 | + public IEnumerable<string> Files { get; set; } | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + static void Main(string[] args) | ||
| 53 | + { | ||
| 54 | + var parser = new CommandLine.Parser(with => with.HelpWriter = null); | ||
| 55 | + var parserResult = parser.ParseArguments<Options>(args); | ||
| 56 | + | ||
| 57 | + parserResult | ||
| 58 | + .WithParsed<Options>(options => Run(options)) | ||
| 59 | + .WithNotParsed(errs => DisplayHelp(parserResult, errs)); | ||
| 60 | + } | ||
| 61 | + | ||
| 62 | + private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) | ||
| 63 | + { | ||
| 64 | + string usage = @" | ||
| 65 | +# Zipformer | ||
| 66 | + | ||
| 67 | +dotnet run \ | ||
| 68 | + --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \ | ||
| 69 | + --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \ | ||
| 70 | + --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \ | ||
| 71 | + --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \ | ||
| 72 | + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ | ||
| 73 | + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \ | ||
| 74 | + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav | ||
| 75 | + | ||
| 76 | +Please refer to | ||
| 77 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/index.html | ||
| 78 | +to download pre-trained non-streaming zipformer models. | ||
| 79 | + | ||
| 80 | +# Paraformer | ||
| 81 | + | ||
| 82 | +dotnet run \ | ||
| 83 | + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ | ||
| 84 | + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \ | ||
| 85 | + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ | ||
| 86 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \ | ||
| 87 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \ | ||
| 88 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \ | ||
| 89 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav | ||
| 90 | + | ||
| 91 | +Please refer to | ||
| 92 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-paraformer/index.html | ||
| 93 | +to download pre-trained paraformer models | ||
| 94 | + | ||
| 95 | +# NeMo CTC | ||
| 96 | + | ||
| 97 | +dotnet run \ | ||
| 98 | + --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \ | ||
| 99 | + --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ | ||
| 100 | + --num-threads=1 \ | ||
| 101 | + --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \ | ||
| 102 | + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \ | ||
| 103 | + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav | ||
| 104 | + | ||
| 105 | +Please refer to | ||
| 106 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/index.html | ||
| 107 | +to download pre-trained paraformer models | ||
| 108 | +"; | ||
| 109 | + | ||
| 110 | + var helpText = HelpText.AutoBuild(result, h => | ||
| 111 | + { | ||
| 112 | + h.AdditionalNewLineAfterOption = false; | ||
| 113 | + h.Heading = usage; | ||
| 114 | + h.Copyright = "Copyright (c) 2023 Xiaomi Corporation"; | ||
| 115 | + return HelpText.DefaultParsingErrorsHandler(result, h); | ||
| 116 | + }, e => e); | ||
| 117 | + Console.WriteLine(helpText); | ||
| 118 | + } | ||
| 119 | + | ||
| 120 | + private static void Run(Options options) | ||
| 121 | + { | ||
| 122 | + OfflineRecognizerConfig config = new OfflineRecognizerConfig(); | ||
| 123 | + config.ModelConfig.Tokens = options.Tokens; | ||
| 124 | + | ||
| 125 | + if (!String.IsNullOrEmpty(options.Encoder)) | ||
| 126 | + { | ||
| 127 | + // this is a transducer model | ||
| 128 | + config.ModelConfig.Transducer.Encoder = options.Encoder; | ||
| 129 | + config.ModelConfig.Transducer.Decoder = options.Decoder; | ||
| 130 | + config.ModelConfig.Transducer.Joiner = options.Joiner; | ||
| 131 | + } | ||
| 132 | + else if (!String.IsNullOrEmpty(options.Paraformer)) | ||
| 133 | + { | ||
| 134 | + config.ModelConfig.Paraformer.Model = options.Paraformer; | ||
| 135 | + } | ||
| 136 | + else if (!String.IsNullOrEmpty(options.NeMoCtc)) | ||
| 137 | + { | ||
| 138 | + config.ModelConfig.NeMoCtc.Model = options.NeMoCtc; | ||
| 139 | + } | ||
| 140 | + else | ||
| 141 | + { | ||
| 142 | + Console.WriteLine("Please provide a model"); | ||
| 143 | + return; | ||
| 144 | + } | ||
| 145 | + | ||
| 146 | + config.DecodingMethod = options.DecodingMethod; | ||
| 147 | + config.MaxActivePaths = options.MaxActivePaths; | ||
| 148 | + config.ModelConfig.Debug = 0; | ||
| 149 | + | ||
| 150 | + OfflineRecognizer recognizer = new OfflineRecognizer(config); | ||
| 151 | + | ||
| 152 | + string[] files = options.Files.ToArray(); | ||
| 153 | + | ||
| 154 | + // We create a separate stream for each file | ||
| 155 | + List<OfflineStream> streams = new List<OfflineStream>(); | ||
| 156 | + streams.EnsureCapacity(files.Length); | ||
| 157 | + | ||
| 158 | + for (int i = 0; i != files.Length; ++i) | ||
| 159 | + { | ||
| 160 | + OfflineStream s = recognizer.CreateStream(); | ||
| 161 | + | ||
| 162 | + WaveReader waveReader = new WaveReader(files[i]); | ||
| 163 | + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | ||
| 164 | + streams.Add(s); | ||
| 165 | + } | ||
| 166 | + | ||
| 167 | + recognizer.Decode(streams); | ||
| 168 | + | ||
| 169 | + // display results | ||
| 170 | + for (int i = 0; i != files.Length; ++i) | ||
| 171 | + { | ||
| 172 | + var text = streams[i].Result.Text; | ||
| 173 | + Console.WriteLine("--------------------"); | ||
| 174 | + Console.WriteLine(files[i]); | ||
| 175 | + Console.WriteLine(text); | ||
| 176 | + } | ||
| 177 | + Console.WriteLine("--------------------"); | ||
| 178 | + } | ||
| 179 | +} |
| 1 | +../online-decode-files/WaveReader.cs |
| 1 | -<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | - | ||
| 3 | - <PropertyGroup> | ||
| 4 | - <OutputType>Exe</OutputType> | ||
| 5 | - <TargetFramework>net6.0</TargetFramework> | ||
| 6 | - <RootNamespace>sherpa_onnx</RootNamespace> | ||
| 7 | - <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | - <Nullable>enable</Nullable> | ||
| 9 | - <StartupObject>OnlineDecodeFiles</StartupObject> | ||
| 10 | - </PropertyGroup> | ||
| 11 | - | ||
| 12 | - <ItemGroup> | ||
| 13 | - <PackageReference Include="NAudio" Version="2.1.0" /> | ||
| 14 | - </ItemGroup> | ||
| 15 | - | ||
| 16 | - <ItemGroup> | ||
| 17 | - <ProjectReference Include="..\SherpaOnnx\SherpaOnnx.csproj" /> | ||
| 18 | - </ItemGroup> | ||
| 19 | - | ||
| 20 | -</Project> | 1 | +<Project Sdk="Microsoft.NET.Sdk"> |
| 2 | + | ||
| 3 | + <PropertyGroup> | ||
| 4 | + <OutputType>Exe</OutputType> | ||
| 5 | + <TargetFramework>net6.0</TargetFramework> | ||
| 6 | + <RootNamespace>offline_decode_files</RootNamespace> | ||
| 7 | + <ImplicitUsings>enable</ImplicitUsings> | ||
| 8 | + <Nullable>enable</Nullable> | ||
| 9 | + </PropertyGroup> | ||
| 10 | + | ||
| 11 | + <ItemGroup> | ||
| 12 | + <PackageReference Include="CommandLineParser" Version="2.9.1" /> | ||
| 13 | + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" /> | ||
| 14 | + </ItemGroup> | ||
| 15 | + | ||
| 16 | +</Project> |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +if [ ! -d ./sherpa-onnx-nemo-ctc-en-conformer-medium ]; then | ||
| 4 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 5 | + cd sherpa-onnx-nemo-ctc-en-conformer-medium | ||
| 6 | + git lfs pull --include "*.onnx" | ||
| 7 | + cd .. | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +dotnet run \ | ||
| 11 | + --tokens=./sherpa-onnx-nemo-ctc-en-conformer-medium/tokens.txt \ | ||
| 12 | + --nemo-ctc=./sherpa-onnx-nemo-ctc-en-conformer-medium/model.onnx \ | ||
| 13 | + --num-threads=1 \ | ||
| 14 | + --files ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/0.wav \ | ||
| 15 | + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/1.wav \ | ||
| 16 | + ./sherpa-onnx-nemo-ctc-en-conformer-medium/test_wavs/8k.wav |
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +if [ ! -d ./sherpa-onnx-paraformer-zh-2023-03-28 ]; then | ||
| 4 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 5 | + cd sherpa-onnx-paraformer-zh-2023-03-28 | ||
| 6 | + git lfs pull --include "*.onnx" | ||
| 7 | + cd .. | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +dotnet run \ | ||
| 11 | + --tokens=./sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt \ | ||
| 12 | + --paraformer=./sherpa-onnx-paraformer-zh-2023-03-28/model.onnx \ | ||
| 13 | + --num-threads=2 \ | ||
| 14 | + --files ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav \ | ||
| 15 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/1.wav \ | ||
| 16 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/2.wav \ | ||
| 17 | + ./sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/8k.wav |
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# | ||
| 3 | +if [ ! -d ./sherpa-onnx-zipformer-en-2023-04-01 ]; then | ||
| 4 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-en-2023-04-01 | ||
| 5 | + cd sherpa-onnx-zipformer-en-2023-04-01 | ||
| 6 | + git lfs pull --include "*.onnx" | ||
| 7 | + cd .. | ||
| 8 | +fi | ||
| 9 | + | ||
| 10 | +dotnet run \ | ||
| 11 | + --tokens=./sherpa-onnx-zipformer-en-2023-04-01/tokens.txt \ | ||
| 12 | + --encoder=./sherpa-onnx-zipformer-en-2023-04-01/encoder-epoch-99-avg-1.onnx \ | ||
| 13 | + --decoder=./sherpa-onnx-zipformer-en-2023-04-01/decoder-epoch-99-avg-1.onnx \ | ||
| 14 | + --joiner=./sherpa-onnx-zipformer-en-2023-04-01/joiner-epoch-99-avg-1.onnx \ | ||
| 15 | + --num-threads=2 \ | ||
| 16 | + --decoding-method=modified_beam_search \ | ||
| 17 | + --files ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/0.wav \ | ||
| 18 | + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/1.wav \ | ||
| 19 | + ./sherpa-onnx-zipformer-en-2023-04-01/test_wavs/8k.wav |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation | ||
| 2 | +// Copyright (c) 2023 by manyeyes | ||
| 3 | +// | ||
| 4 | +// This file shows how to use a streaming model to decode files | ||
| 5 | +// Please refer to | ||
| 6 | +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html | ||
| 7 | +// to download streaming models | ||
| 8 | + | ||
| 9 | +using CommandLine.Text; | ||
| 10 | +using CommandLine; | ||
| 11 | +using SherpaOnnx; | ||
| 12 | +using System.Collections.Generic; | ||
| 13 | +using System.Linq; | ||
| 14 | +using System; | ||
| 15 | + | ||
| 16 | +class OnlineDecodeFiles | ||
| 17 | +{ | ||
| 18 | + class Options | ||
| 19 | + { | ||
| 20 | + [Option(Required = true, HelpText = "Path to tokens.txt")] | ||
| 21 | + public string Tokens { get; set; } | ||
| 22 | + | ||
| 23 | + [Option(Required = true, HelpText = "Path to encoder.onnx")] | ||
| 24 | + public string Encoder { get; set; } | ||
| 25 | + | ||
| 26 | + [Option(Required = true, HelpText = "Path to decoder.onnx")] | ||
| 27 | + public string Decoder { get; set; } | ||
| 28 | + | ||
| 29 | + [Option(Required = true, HelpText = "Path to joiner.onnx")] | ||
| 30 | + public string Joiner { get; set; } | ||
| 31 | + | ||
| 32 | + [Option("num-threads", Required = false, Default = 1, HelpText = "Number of threads for computation")] | ||
| 33 | + public int NumThreads { get; set; } | ||
| 34 | + | ||
| 35 | + [Option("decoding-method", Required = false, Default = "greedy_search", | ||
| 36 | + HelpText = "Valid decoding methods are: greedy_search, modified_beam_search")] | ||
| 37 | + public string DecodingMethod { get; set; } | ||
| 38 | + | ||
| 39 | + [Option(Required = false, Default = false, HelpText = "True to show model info during loading")] | ||
| 40 | + public bool Debug { get; set; } | ||
| 41 | + | ||
| 42 | + [Option("sample-rate", Required = false, Default = 16000, HelpText = "Sample rate of the data used to train the model")] | ||
| 43 | + public int SampleRate { get; set; } | ||
| 44 | + | ||
| 45 | + [Option("max-active-paths", Required = false, Default = 4, | ||
| 46 | + HelpText = @"Used only when --decoding--method is modified_beam_search. | ||
| 47 | +It specifies number of active paths to keep during the search")] | ||
| 48 | + public int MaxActivePaths { get; set; } | ||
| 49 | + | ||
| 50 | + [Option("enable-endpoint", Required = false, Default = false, | ||
| 51 | + HelpText = "True to enable endpoint detection.")] | ||
| 52 | + public bool EnableEndpoint { get; set; } | ||
| 53 | + | ||
| 54 | + [Option("rule1-min-trailing-silence", Required = false, Default = 2.4F, | ||
| 55 | + HelpText = @"An endpoint is detected if trailing silence in seconds is | ||
| 56 | +larger than this value even if nothing has been decoded. Used only when --enable-endpoint is true.")] | ||
| 57 | + public float Rule1MinTrailingSilence { get; set; } | ||
| 58 | + | ||
| 59 | + [Option("rule2-min-trailing-silence", Required = false, Default = 1.2F, | ||
| 60 | + HelpText = @"An endpoint is detected if trailing silence in seconds is | ||
| 61 | +larger than this value after something that is not blank has been decoded. Used | ||
| 62 | +only when --enable-endpoint is true.")] | ||
| 63 | + public float Rule2MinTrailingSilence { get; set; } | ||
| 64 | + | ||
| 65 | + [Option("rule3-min-utterance-length", Required = false, Default = 20.0F, | ||
| 66 | + HelpText = @"An endpoint is detected if the utterance in seconds is | ||
| 67 | +larger than this value. Used only when --enable-endpoint is true.")] | ||
| 68 | + public float Rule3MinUtteranceLength { get; set; } | ||
| 69 | + | ||
| 70 | + [Option("files", Required = true, HelpText = "Audio files for decoding")] | ||
| 71 | + public IEnumerable<string> Files { get; set; } | ||
| 72 | + | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + static void Main(string[] args) | ||
| 76 | + { | ||
| 77 | + var parser = new CommandLine.Parser(with => with.HelpWriter = null); | ||
| 78 | + var parserResult = parser.ParseArguments<Options>(args); | ||
| 79 | + | ||
| 80 | + parserResult | ||
| 81 | + .WithParsed<Options>(options => Run(options)) | ||
| 82 | + .WithNotParsed(errs => DisplayHelp(parserResult, errs)); | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + private static void DisplayHelp<T>(ParserResult<T> result, IEnumerable<Error> errs) | ||
| 86 | + { | ||
| 87 | + string usage = @" | ||
| 88 | +dotnet run \ | ||
| 89 | + --tokens=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ | ||
| 90 | + --encoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx \ | ||
| 91 | + --decoder=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx \ | ||
| 92 | + --joiner=./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx \ | ||
| 93 | + --num-threads=2 \ | ||
| 94 | + --decoding-method=modified_beam_search \ | ||
| 95 | + --debug=false \ | ||
| 96 | + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \ | ||
| 97 | + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav | ||
| 98 | + | ||
| 99 | +Please refer to | ||
| 100 | +https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html | ||
| 101 | +to download pre-trained streaming models. | ||
| 102 | +"; | ||
| 103 | + | ||
| 104 | + var helpText = HelpText.AutoBuild(result, h => | ||
| 105 | + { | ||
| 106 | + h.AdditionalNewLineAfterOption = false; | ||
| 107 | + h.Heading = usage; | ||
| 108 | + h.Copyright = "Copyright (c) 2023 Xiaomi Corporation"; | ||
| 109 | + return HelpText.DefaultParsingErrorsHandler(result, h); | ||
| 110 | + }, e => e); | ||
| 111 | + Console.WriteLine(helpText); | ||
| 112 | + } | ||
| 113 | + | ||
| 114 | + private static void Run(Options options) | ||
| 115 | + { | ||
| 116 | + OnlineRecognizerConfig config = new OnlineRecognizerConfig(); | ||
| 117 | + config.FeatConfig.SampleRate = options.SampleRate; | ||
| 118 | + | ||
| 119 | + // All models from icefall using feature dim 80. | ||
| 120 | + // You can change it if your model has a different feature dim. | ||
| 121 | + config.FeatConfig.FeatureDim = 80; | ||
| 122 | + | ||
| 123 | + config.TransducerModelConfig.Encoder = options.Encoder; | ||
| 124 | + config.TransducerModelConfig.Decoder = options.Decoder; | ||
| 125 | + config.TransducerModelConfig.Joiner = options.Joiner; | ||
| 126 | + config.TransducerModelConfig.Tokens = options.Tokens; | ||
| 127 | + config.TransducerModelConfig.NumThreads = options.NumThreads; | ||
| 128 | + config.TransducerModelConfig.Debug = options.Debug ? 1 : 0; | ||
| 129 | + | ||
| 130 | + config.DecodingMethod = options.DecodingMethod; | ||
| 131 | + config.MaxActivePaths = options.MaxActivePaths; | ||
| 132 | + config.EnableEndpoint = options.EnableEndpoint ? 1 : 0; | ||
| 133 | + | ||
| 134 | + config.Rule1MinTrailingSilence = options.Rule1MinTrailingSilence; | ||
| 135 | + config.Rule2MinTrailingSilence = options.Rule2MinTrailingSilence; | ||
| 136 | + config.Rule3MinUtteranceLength = options.Rule3MinUtteranceLength; | ||
| 137 | + | ||
| 138 | + OnlineRecognizer recognizer = new OnlineRecognizer(config); | ||
| 139 | + | ||
| 140 | + string[] files = options.Files.ToArray(); | ||
| 141 | + | ||
| 142 | + // We create a separate stream for each file | ||
| 143 | + List<OnlineStream> streams = new List<OnlineStream>(); | ||
| 144 | + streams.EnsureCapacity(files.Length); | ||
| 145 | + | ||
| 146 | + for (int i = 0; i != files.Length; ++i) | ||
| 147 | + { | ||
| 148 | + OnlineStream s = recognizer.CreateStream(); | ||
| 149 | + | ||
| 150 | + WaveReader waveReader = new WaveReader(files[i]); | ||
| 151 | + s.AcceptWaveform(waveReader.SampleRate, waveReader.Samples); | ||
| 152 | + | ||
| 153 | + float[] tailPadding = new float[(int)(waveReader.SampleRate * 0.3)]; | ||
| 154 | + s.AcceptWaveform(waveReader.SampleRate, tailPadding); | ||
| 155 | + s.InputFinished(); | ||
| 156 | + | ||
| 157 | + streams.Add(s); | ||
| 158 | + } | ||
| 159 | + | ||
| 160 | + while (true) | ||
| 161 | + { | ||
| 162 | + var readyStreams = streams.Where(s => recognizer.IsReady(s)); | ||
| 163 | + if (!readyStreams.Any()) | ||
| 164 | + { | ||
| 165 | + break; | ||
| 166 | + } | ||
| 167 | + | ||
| 168 | + recognizer.Decode(readyStreams); | ||
| 169 | + } | ||
| 170 | + | ||
| 171 | + // display results | ||
| 172 | + for (int i = 0; i != files.Length; ++i) | ||
| 173 | + { | ||
| 174 | + var text = recognizer.GetResult(streams[i]).Text; | ||
| 175 | + Console.WriteLine("--------------------"); | ||
| 176 | + Console.WriteLine(files[i]); | ||
| 177 | + Console.WriteLine(text); | ||
| 178 | + } | ||
| 179 | + Console.WriteLine("--------------------"); | ||
| 180 | + } | ||
| 181 | +} |
| 1 | +// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +using System; | ||
| 3 | +using System.IO; | ||
| 4 | + | ||
| 5 | +using System.Runtime.InteropServices; | ||
| 6 | + | ||
| 7 | +namespace SherpaOnnx | ||
| 8 | +{ | ||
| 9 | + | ||
| 10 | + [StructLayout(LayoutKind.Sequential)] | ||
| 11 | + public struct WaveHeader | ||
| 12 | + { | ||
| 13 | + public Int32 ChunkID; | ||
| 14 | + public Int32 ChunkSize; | ||
| 15 | + public Int32 Format; | ||
| 16 | + public Int32 SubChunk1ID; | ||
| 17 | + public Int32 SubChunk1Size; | ||
| 18 | + public Int16 AudioFormat; | ||
| 19 | + public Int16 NumChannels; | ||
| 20 | + public Int32 SampleRate; | ||
| 21 | + public Int32 ByteRate; | ||
| 22 | + public Int16 BlockAlign; | ||
| 23 | + public Int16 BitsPerSample; | ||
| 24 | + public Int32 SubChunk2ID; | ||
| 25 | + public Int32 SubChunk2Size; | ||
| 26 | + | ||
| 27 | + public bool Validate() | ||
| 28 | + { | ||
| 29 | + if (ChunkID != 0x46464952) | ||
| 30 | + { | ||
| 31 | + Console.WriteLine($"Invalid chunk ID: 0x{ChunkID:X}. Expect 0x46464952"); | ||
| 32 | + return false; | ||
| 33 | + } | ||
| 34 | + | ||
| 35 | + // E V A W | ||
| 36 | + if (Format != 0x45564157) | ||
| 37 | + { | ||
| 38 | + Console.WriteLine($"Invalid format: 0x{Format:X}. Expect 0x45564157"); | ||
| 39 | + return false; | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + // t m f | ||
| 43 | + if (SubChunk1ID != 0x20746d66) | ||
| 44 | + { | ||
| 45 | + Console.WriteLine($"Invalid SubChunk1ID: 0x{SubChunk1ID:X}. Expect 0x20746d66"); | ||
| 46 | + return false; | ||
| 47 | + } | ||
| 48 | + | ||
| 49 | + if (SubChunk1Size != 16) | ||
| 50 | + { | ||
| 51 | + Console.WriteLine($"Invalid SubChunk1Size: {SubChunk1Size}. Expect 16"); | ||
| 52 | + return false; | ||
| 53 | + } | ||
| 54 | + | ||
| 55 | + if (AudioFormat != 1) | ||
| 56 | + { | ||
| 57 | + Console.WriteLine($"Invalid AudioFormat: {AudioFormat}. Expect 1"); | ||
| 58 | + return false; | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + if (NumChannels != 1) | ||
| 62 | + { | ||
| 63 | + Console.WriteLine($"Invalid NumChannels: {NumChannels}. Expect 1"); | ||
| 64 | + return false; | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + if (ByteRate != (SampleRate * NumChannels * BitsPerSample / 8)) | ||
| 68 | + { | ||
| 69 | + Console.WriteLine($"Invalid byte rate: {ByteRate}."); | ||
| 70 | + return false; | ||
| 71 | + } | ||
| 72 | + | ||
| 73 | + if (BlockAlign != (NumChannels * BitsPerSample / 8)) | ||
| 74 | + { | ||
| 75 | + Console.WriteLine($"Invalid block align: {ByteRate}."); | ||
| 76 | + return false; | ||
| 77 | + } | ||
| 78 | + | ||
| 79 | + if (BitsPerSample != 16) | ||
| 80 | + { // we support only 16 bits per sample | ||
| 81 | + Console.WriteLine($"Invalid bits per sample: {BitsPerSample}. Expect 16"); | ||
| 82 | + return false; | ||
| 83 | + } | ||
| 84 | + | ||
| 85 | + return true; | ||
| 86 | + } | ||
| 87 | + } | ||
| 88 | + | ||
| 89 | + // It supports only 16-bit, single channel WAVE format. | ||
| 90 | + // The sample rate can be any value. | ||
| 91 | + public class WaveReader | ||
| 92 | + { | ||
| 93 | + public WaveReader(String fileName) | ||
| 94 | + { | ||
| 95 | + if (!File.Exists(fileName)) | ||
| 96 | + { | ||
| 97 | + throw new ApplicationException($"{fileName} does not exist!"); | ||
| 98 | + } | ||
| 99 | + | ||
| 100 | + using (var stream = File.Open(fileName, FileMode.Open)) | ||
| 101 | + { | ||
| 102 | + using (var reader = new BinaryReader(stream)) | ||
| 103 | + { | ||
| 104 | + _header = ReadHeader(reader); | ||
| 105 | + | ||
| 106 | + if (!_header.Validate()) | ||
| 107 | + { | ||
| 108 | + throw new ApplicationException($"Invalid wave file ${fileName}"); | ||
| 109 | + } | ||
| 110 | + | ||
| 111 | + SkipMetaData(reader); | ||
| 112 | + | ||
| 113 | + // now read samples | ||
| 114 | + // _header.SubChunk2Size contains number of bytes in total. | ||
| 115 | + // we assume each sample is of type int16 | ||
| 116 | + byte[] buffer = reader.ReadBytes(_header.SubChunk2Size); | ||
| 117 | + short[] samples_int16 = new short[_header.SubChunk2Size / 2]; | ||
| 118 | + Buffer.BlockCopy(buffer, 0, samples_int16, 0, buffer.Length); | ||
| 119 | + | ||
| 120 | + _samples = new float[samples_int16.Length]; | ||
| 121 | + | ||
| 122 | + for (var i = 0; i < samples_int16.Length; ++i) | ||
| 123 | + { | ||
| 124 | + _samples[i] = samples_int16[i] / 32768.0F; | ||
| 125 | + } | ||
| 126 | + } | ||
| 127 | + } | ||
| 128 | + } | ||
| 129 | + | ||
| 130 | + private static WaveHeader ReadHeader(BinaryReader reader) | ||
| 131 | + { | ||
| 132 | + byte[] bytes = reader.ReadBytes(Marshal.SizeOf(typeof(WaveHeader))); | ||
| 133 | + | ||
| 134 | + GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned); | ||
| 135 | + WaveHeader header = (WaveHeader)Marshal.PtrToStructure(handle.AddrOfPinnedObject(), typeof(WaveHeader))!; | ||
| 136 | + handle.Free(); | ||
| 137 | + | ||
| 138 | + return header; | ||
| 139 | + } | ||
| 140 | + | ||
| 141 | + private void SkipMetaData(BinaryReader reader) | ||
| 142 | + { | ||
| 143 | + var bs = reader.BaseStream; | ||
| 144 | + | ||
| 145 | + Int32 subChunk2ID = _header.SubChunk2ID; | ||
| 146 | + Int32 subChunk2Size = _header.SubChunk2Size; | ||
| 147 | + | ||
| 148 | + while (bs.Position != bs.Length && subChunk2ID != 0x61746164) | ||
| 149 | + { | ||
| 150 | + bs.Seek(subChunk2Size, SeekOrigin.Current); | ||
| 151 | + subChunk2ID = reader.ReadInt32(); | ||
| 152 | + subChunk2Size = reader.ReadInt32(); | ||
| 153 | + } | ||
| 154 | + _header.SubChunk2ID = subChunk2ID; | ||
| 155 | + _header.SubChunk2Size = subChunk2Size; | ||
| 156 | + } | ||
| 157 | + | ||
| 158 | + private WaveHeader _header; | ||
| 159 | + | ||
| 160 | + // Samples are normalized to the range [-1, 1] | ||
| 161 | + private float[] _samples; | ||
| 162 | + | ||
| 163 | + public int SampleRate => _header.SampleRate; | ||
| 164 | + public float[] Samples => _samples; | ||
| 165 | + | ||
| 166 | + public static void Test(String fileName) | ||
| 167 | + { | ||
| 168 | + WaveReader reader = new WaveReader(fileName); | ||
| 169 | + Console.WriteLine($"samples length: {reader.Samples.Length}"); | ||
| 170 | + Console.WriteLine($"samples rate: {reader.SampleRate}"); | ||
| 171 | + } | ||
| 172 | + } | ||
| 173 | + | ||
| 174 | +} |
| 1 | <Project Sdk="Microsoft.NET.Sdk"> | 1 | <Project Sdk="Microsoft.NET.Sdk"> |
| 2 | 2 | ||
| 3 | <PropertyGroup> | 3 | <PropertyGroup> |
| 4 | + <OutputType>Exe</OutputType> | ||
| 4 | <TargetFramework>net6.0</TargetFramework> | 5 | <TargetFramework>net6.0</TargetFramework> |
| 6 | + <RootNamespace>online_decode_files</RootNamespace> | ||
| 5 | <ImplicitUsings>enable</ImplicitUsings> | 7 | <ImplicitUsings>enable</ImplicitUsings> |
| 6 | <Nullable>enable</Nullable> | 8 | <Nullable>enable</Nullable> |
| 7 | - <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
| 8 | </PropertyGroup> | 9 | </PropertyGroup> |
| 9 | 10 | ||
| 11 | + <ItemGroup> | ||
| 12 | + <PackageReference Include="CommandLineParser" Version="2.9.1" /> | ||
| 13 | + <PackageReference Include="org.k2fsa.sherpa.onnx" Version="*" /> | ||
| 14 | + </ItemGroup> | ||
| 15 | + | ||
| 10 | </Project> | 16 | </Project> |
dotnet-examples/online-decode-files/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +# Please refer to | ||
| 4 | +# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english | ||
| 5 | +# to download the model files | ||
| 6 | + | ||
| 7 | +if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then | ||
| 8 | + GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 9 | + cd sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 | ||
| 10 | + git lfs pull --include "*.onnx" | ||
| 11 | + cd .. | ||
| 12 | +fi | ||
| 13 | + | ||
| 14 | +dotnet run -c Release \ | ||
| 15 | + --tokens ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt \ | ||
| 16 | + --encoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx \ | ||
| 17 | + --decoder ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.int8.onnx \ | ||
| 18 | + --joiner ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx \ | ||
| 19 | + --decoding-method greedy_search \ | ||
| 20 | + --files ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav \ | ||
| 21 | + ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav \ |
dotnet-examples/sherpa-onnx.sln
0 → 100644
| 1 | + | ||
| 2 | +Microsoft Visual Studio Solution File, Format Version 12.00 | ||
| 3 | +# Visual Studio Version 17 | ||
| 4 | +VisualStudioVersion = 17.0.31903.59 | ||
| 5 | +MinimumVisualStudioVersion = 10.0.40219.1 | ||
| 6 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "online-decode-files", "online-decode-files\online-decode-files.csproj", "{45307474-BECB-4ABE-9388-D01D55A1A9BE}" | ||
| 7 | +EndProject | ||
| 8 | +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "offline-decode-files", "offline-decode-files\offline-decode-files.csproj", "{2DAB152C-9E24-47A0-9DB0-781297ECE458}" | ||
| 9 | +EndProject | ||
| 10 | +Global | ||
| 11 | + GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
| 12 | + Debug|Any CPU = Debug|Any CPU | ||
| 13 | + Release|Any CPU = Release|Any CPU | ||
| 14 | + EndGlobalSection | ||
| 15 | + GlobalSection(SolutionProperties) = preSolution | ||
| 16 | + HideSolutionNode = FALSE | ||
| 17 | + EndGlobalSection | ||
| 18 | + GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
| 19 | + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 20 | + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 21 | + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 22 | + {45307474-BECB-4ABE-9388-D01D55A1A9BE}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 23 | + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
| 24 | + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
| 25 | + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
| 26 | + {2DAB152C-9E24-47A0-9DB0-781297ECE458}.Release|Any CPU.Build.0 = Release|Any CPU | ||
| 27 | + EndGlobalSection | ||
| 28 | +EndGlobal |
scripts/dotnet/README.md
0 → 100644
| 1 | +# Introduction | ||
| 2 | + | ||
| 3 | +[sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx) is an open-source | ||
| 4 | +real-time speech recognition toolkit developed | ||
| 5 | +by the Next-gen Kaldi team. | ||
| 6 | + | ||
| 7 | +It supports streaming recognition on a variety of | ||
| 8 | +platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc. | ||
| 9 | + | ||
| 10 | +It does not require Internet connection during recognition. | ||
| 11 | + | ||
| 12 | +See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html | ||
| 13 | +for details. | ||
| 14 | + | ||
| 15 | +Please see | ||
| 16 | +https://github.com/k2-fsa/sherpa-onnx/tree/dot-net/dotnet-examples | ||
| 17 | +for how to use C# APIs of this package. |
scripts/dotnet/generate.py
0 → 100755
| 1 | +#!/usr/bin/env python3 | ||
| 2 | +# Copyright (c) 2023 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +import argparse | ||
| 5 | +import re | ||
| 6 | +from pathlib import Path | ||
| 7 | + | ||
| 8 | +import jinja2 | ||
| 9 | + | ||
| 10 | +SHERPA_ONNX_DIR = Path(__file__).resolve().parent.parent.parent | ||
| 11 | + | ||
| 12 | + | ||
| 13 | +def get_version(): | ||
| 14 | + cmake_file = SHERPA_ONNX_DIR / "CMakeLists.txt" | ||
| 15 | + with open(cmake_file) as f: | ||
| 16 | + content = f.read() | ||
| 17 | + | ||
| 18 | + version = re.search(r"set\(SHERPA_ONNX_VERSION (.*)\)", content).group(1) | ||
| 19 | + return version.strip('"') | ||
| 20 | + | ||
| 21 | + | ||
| 22 | +def read_proj_file(filename): | ||
| 23 | + with open(filename) as f: | ||
| 24 | + return f.read() | ||
| 25 | + | ||
| 26 | + | ||
| 27 | +def get_dict(): | ||
| 28 | + version = get_version() | ||
| 29 | + return { | ||
| 30 | + "version": get_version(), | ||
| 31 | + } | ||
| 32 | + | ||
| 33 | + | ||
| 34 | +def process_linux(s): | ||
| 35 | + libs = [ | ||
| 36 | + "libkaldi-native-fbank-core.so", | ||
| 37 | + "libonnxruntime.so.1.14.0", | ||
| 38 | + "libsherpa-onnx-c-api.so", | ||
| 39 | + "libsherpa-onnx-core.so", | ||
| 40 | + ] | ||
| 41 | + prefix = f"{SHERPA_ONNX_DIR}/linux/sherpa_onnx/lib/" | ||
| 42 | + libs = [prefix + lib for lib in libs] | ||
| 43 | + libs = "\n ;".join(libs) | ||
| 44 | + | ||
| 45 | + d = get_dict() | ||
| 46 | + d["dotnet_rid"] = "linux-x64" | ||
| 47 | + d["libs"] = libs | ||
| 48 | + | ||
| 49 | + environment = jinja2.Environment() | ||
| 50 | + template = environment.from_string(s) | ||
| 51 | + s = template.render(**d) | ||
| 52 | + with open("./linux/sherpa-onnx.runtime.csproj", "w") as f: | ||
| 53 | + f.write(s) | ||
| 54 | + | ||
| 55 | + | ||
| 56 | +def process_macos(s): | ||
| 57 | + libs = [ | ||
| 58 | + "libkaldi-native-fbank-core.dylib", | ||
| 59 | + "libonnxruntime.1.14.0.dylib", | ||
| 60 | + "libsherpa-onnx-c-api.dylib", | ||
| 61 | + "libsherpa-onnx-core.dylib", | ||
| 62 | + ] | ||
| 63 | + prefix = f"{SHERPA_ONNX_DIR}/macos/sherpa_onnx/lib/" | ||
| 64 | + libs = [prefix + lib for lib in libs] | ||
| 65 | + libs = "\n ;".join(libs) | ||
| 66 | + | ||
| 67 | + d = get_dict() | ||
| 68 | + d["dotnet_rid"] = "osx-x64" | ||
| 69 | + d["libs"] = libs | ||
| 70 | + | ||
| 71 | + environment = jinja2.Environment() | ||
| 72 | + template = environment.from_string(s) | ||
| 73 | + s = template.render(**d) | ||
| 74 | + with open("./macos/sherpa-onnx.runtime.csproj", "w") as f: | ||
| 75 | + f.write(s) | ||
| 76 | + | ||
| 77 | + | ||
| 78 | +def process_windows(s): | ||
| 79 | + libs = [ | ||
| 80 | + "kaldi-native-fbank-core.dll", | ||
| 81 | + "onnxruntime.dll", | ||
| 82 | + "sherpa-onnx-c-api.dll", | ||
| 83 | + "sherpa-onnx-core.dll", | ||
| 84 | + ] | ||
| 85 | + prefix = f"{SHERPA_ONNX_DIR}/windows/sherpa_onnx/lib/" | ||
| 86 | + libs = [prefix + lib for lib in libs] | ||
| 87 | + libs = "\n ;".join(libs) | ||
| 88 | + | ||
| 89 | + d = get_dict() | ||
| 90 | + d["dotnet_rid"] = "win-x64" | ||
| 91 | + d["libs"] = libs | ||
| 92 | + | ||
| 93 | + environment = jinja2.Environment() | ||
| 94 | + template = environment.from_string(s) | ||
| 95 | + s = template.render(**d) | ||
| 96 | + with open("./windows/sherpa-onnx.runtime.csproj", "w") as f: | ||
| 97 | + f.write(s) | ||
| 98 | + | ||
| 99 | + | ||
| 100 | +def main(): | ||
| 101 | + s = read_proj_file("./sherpa-onnx.csproj.runtime.in") | ||
| 102 | + process_macos(s) | ||
| 103 | + process_linux(s) | ||
| 104 | + process_windows(s) | ||
| 105 | + | ||
| 106 | + s = read_proj_file("./sherpa-onnx.csproj.in") | ||
| 107 | + d = get_dict() | ||
| 108 | + d["packages_dir"] = str(SHERPA_ONNX_DIR / "scripts/dotnet/packages") | ||
| 109 | + | ||
| 110 | + environment = jinja2.Environment() | ||
| 111 | + template = environment.from_string(s) | ||
| 112 | + s = template.render(**d) | ||
| 113 | + with open("./all/sherpa-onnx.csproj", "w") as f: | ||
| 114 | + f.write(s) | ||
| 115 | + | ||
| 116 | + | ||
| 117 | +if __name__ == "__main__": | ||
| 118 | + main() |
scripts/dotnet/offline.cs
0 → 100644
| 1 | +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +/// Copyright (c) 2023 by manyeyes | ||
| 3 | + | ||
| 4 | +using System.Linq; | ||
| 5 | +using System.Collections.Generic; | ||
| 6 | +using System.Runtime.InteropServices; | ||
| 7 | +using System; | ||
| 8 | + | ||
| 9 | +namespace SherpaOnnx | ||
| 10 | +{ | ||
| 11 | + | ||
| 12 | + [StructLayout(LayoutKind.Sequential)] | ||
| 13 | + public struct OfflineTransducerModelConfig | ||
| 14 | + { | ||
| 15 | + public OfflineTransducerModelConfig() | ||
| 16 | + { | ||
| 17 | + Encoder = ""; | ||
| 18 | + Decoder = ""; | ||
| 19 | + Joiner = ""; | ||
| 20 | + } | ||
| 21 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 22 | + public string Encoder; | ||
| 23 | + | ||
| 24 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 25 | + public string Decoder; | ||
| 26 | + | ||
| 27 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 28 | + public string Joiner; | ||
| 29 | + } | ||
| 30 | + | ||
| 31 | + [StructLayout(LayoutKind.Sequential)] | ||
| 32 | + public struct OfflineParaformerModelConfig | ||
| 33 | + { | ||
| 34 | + public OfflineParaformerModelConfig() | ||
| 35 | + { | ||
| 36 | + Model = ""; | ||
| 37 | + } | ||
| 38 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 39 | + public string Model; | ||
| 40 | + } | ||
| 41 | + | ||
| 42 | + [StructLayout(LayoutKind.Sequential)] | ||
| 43 | + public struct OfflineNemoEncDecCtcModelConfig | ||
| 44 | + { | ||
| 45 | + public OfflineNemoEncDecCtcModelConfig() | ||
| 46 | + { | ||
| 47 | + Model = ""; | ||
| 48 | + } | ||
| 49 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 50 | + public string Model; | ||
| 51 | + } | ||
| 52 | + | ||
| 53 | + [StructLayout(LayoutKind.Sequential)] | ||
| 54 | + public struct OfflineLMConfig | ||
| 55 | + { | ||
| 56 | + public OfflineLMConfig() | ||
| 57 | + { | ||
| 58 | + Model = ""; | ||
| 59 | + Scale = 0.5F; | ||
| 60 | + } | ||
| 61 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 62 | + public string Model; | ||
| 63 | + | ||
| 64 | + public float Scale; | ||
| 65 | + } | ||
| 66 | + | ||
| 67 | + [StructLayout(LayoutKind.Sequential)] | ||
| 68 | + public struct OfflineModelConfig | ||
| 69 | + { | ||
| 70 | + public OfflineModelConfig() | ||
| 71 | + { | ||
| 72 | + Transducer = new OfflineTransducerModelConfig(); | ||
| 73 | + Paraformer = new OfflineParaformerModelConfig(); | ||
| 74 | + NeMoCtc = new OfflineNemoEncDecCtcModelConfig(); | ||
| 75 | + Tokens = ""; | ||
| 76 | + NumThreads = 1; | ||
| 77 | + Debug = 0; | ||
| 78 | + } | ||
| 79 | + public OfflineTransducerModelConfig Transducer; | ||
| 80 | + public OfflineParaformerModelConfig Paraformer; | ||
| 81 | + public OfflineNemoEncDecCtcModelConfig NeMoCtc; | ||
| 82 | + | ||
| 83 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 84 | + public string Tokens; | ||
| 85 | + | ||
| 86 | + public int NumThreads; | ||
| 87 | + | ||
| 88 | + public int Debug; | ||
| 89 | + } | ||
| 90 | + | ||
| 91 | + [StructLayout(LayoutKind.Sequential)] | ||
| 92 | + public struct OfflineRecognizerConfig | ||
| 93 | + { | ||
| 94 | + public OfflineRecognizerConfig() | ||
| 95 | + { | ||
| 96 | + FeatConfig = new FeatureConfig(); | ||
| 97 | + ModelConfig = new OfflineModelConfig(); | ||
| 98 | + LmConfig = new OfflineLMConfig(); | ||
| 99 | + | ||
| 100 | + DecodingMethod = "greedy_search"; | ||
| 101 | + MaxActivePaths = 4; | ||
| 102 | + | ||
| 103 | + } | ||
| 104 | + public FeatureConfig FeatConfig; | ||
| 105 | + public OfflineModelConfig ModelConfig; | ||
| 106 | + public OfflineLMConfig LmConfig; | ||
| 107 | + | ||
| 108 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 109 | + public string DecodingMethod; | ||
| 110 | + | ||
| 111 | + public int MaxActivePaths; | ||
| 112 | + } | ||
| 113 | + | ||
| 114 | + public class OfflineRecognizerResult | ||
| 115 | + { | ||
| 116 | + public OfflineRecognizerResult(IntPtr handle) | ||
| 117 | + { | ||
| 118 | + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); | ||
| 119 | + _text = Marshal.PtrToStringUTF8(impl.Text); | ||
| 120 | + } | ||
| 121 | + | ||
| 122 | + [StructLayout(LayoutKind.Sequential)] | ||
| 123 | + struct Impl | ||
| 124 | + { | ||
| 125 | + public IntPtr Text; | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + private String _text; | ||
| 129 | + public String Text => _text; | ||
| 130 | + } | ||
| 131 | + | ||
| 132 | + public class OfflineStream : IDisposable | ||
| 133 | + { | ||
| 134 | + public OfflineStream(IntPtr p) | ||
| 135 | + { | ||
| 136 | + _handle = new HandleRef(this, p); | ||
| 137 | + } | ||
| 138 | + | ||
| 139 | + public void AcceptWaveform(int sampleRate, float[] samples) | ||
| 140 | + { | ||
| 141 | + AcceptWaveform(Handle, sampleRate, samples, samples.Length); | ||
| 142 | + } | ||
| 143 | + | ||
| 144 | + public OfflineRecognizerResult Result | ||
| 145 | + { | ||
| 146 | + get | ||
| 147 | + { | ||
| 148 | + IntPtr h = GetResult(_handle.Handle); | ||
| 149 | + OfflineRecognizerResult result = new OfflineRecognizerResult(h); | ||
| 150 | + DestroyResult(h); | ||
| 151 | + return result; | ||
| 152 | + } | ||
| 153 | + } | ||
| 154 | + | ||
| 155 | + ~OfflineStream() | ||
| 156 | + { | ||
| 157 | + Cleanup(); | ||
| 158 | + } | ||
| 159 | + | ||
| 160 | + public void Dispose() | ||
| 161 | + { | ||
| 162 | + Cleanup(); | ||
| 163 | + // Prevent the object from being placed on the | ||
| 164 | + // finalization queue | ||
| 165 | + System.GC.SuppressFinalize(this); | ||
| 166 | + } | ||
| 167 | + | ||
| 168 | + private void Cleanup() | ||
| 169 | + { | ||
| 170 | + DestroyOfflineStream(Handle); | ||
| 171 | + | ||
| 172 | + // Don't permit the handle to be used again. | ||
| 173 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 174 | + } | ||
| 175 | + | ||
| 176 | + private HandleRef _handle; | ||
| 177 | + public IntPtr Handle => _handle.Handle; | ||
| 178 | + | ||
| 179 | + [DllImport(Dll.Filename)] | ||
| 180 | + private static extern void DestroyOfflineStream(IntPtr handle); | ||
| 181 | + | ||
| 182 | + [DllImport(Dll.Filename, EntryPoint = "AcceptWaveformOffline")] | ||
| 183 | + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); | ||
| 184 | + | ||
| 185 | + [DllImport(Dll.Filename, EntryPoint = "GetOfflineStreamResult")] | ||
| 186 | + private static extern IntPtr GetResult(IntPtr handle); | ||
| 187 | + | ||
| 188 | + [DllImport(Dll.Filename, EntryPoint = "DestroyOfflineRecognizerResult")] | ||
| 189 | + private static extern void DestroyResult(IntPtr handle); | ||
| 190 | + } | ||
| 191 | + | ||
| 192 | + public class OfflineRecognizer : IDisposable | ||
| 193 | + { | ||
| 194 | + public OfflineRecognizer(OfflineRecognizerConfig config) | ||
| 195 | + { | ||
| 196 | + IntPtr h = CreateOfflineRecognizer(ref config); | ||
| 197 | + _handle = new HandleRef(this, h); | ||
| 198 | + } | ||
| 199 | + | ||
| 200 | + public OfflineStream CreateStream() | ||
| 201 | + { | ||
| 202 | + IntPtr p = CreateOfflineStream(_handle.Handle); | ||
| 203 | + return new OfflineStream(p); | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + /// You have to ensure that IsReady(stream) returns true before | ||
| 207 | + /// you call this method | ||
| 208 | + public void Decode(OfflineStream stream) | ||
| 209 | + { | ||
| 210 | + Decode(_handle.Handle, stream.Handle); | ||
| 211 | + } | ||
| 212 | + | ||
| 213 | + // The caller should ensure all passed streams are ready for decoding. | ||
| 214 | + public void Decode(IEnumerable<OfflineStream> streams) | ||
| 215 | + { | ||
| 216 | + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); | ||
| 217 | + Decode(_handle.Handle, ptrs, ptrs.Length); | ||
| 218 | + } | ||
| 219 | + | ||
| 220 | + public void Dispose() | ||
| 221 | + { | ||
| 222 | + Cleanup(); | ||
| 223 | + // Prevent the object from being placed on the | ||
| 224 | + // finalization queue | ||
| 225 | + System.GC.SuppressFinalize(this); | ||
| 226 | + } | ||
| 227 | + | ||
| 228 | + ~OfflineRecognizer() | ||
| 229 | + { | ||
| 230 | + Cleanup(); | ||
| 231 | + } | ||
| 232 | + | ||
| 233 | + private void Cleanup() | ||
| 234 | + { | ||
| 235 | + DestroyOfflineRecognizer(_handle.Handle); | ||
| 236 | + | ||
| 237 | + // Don't permit the handle to be used again. | ||
| 238 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 239 | + } | ||
| 240 | + | ||
| 241 | + private HandleRef _handle; | ||
| 242 | + | ||
| 243 | + [DllImport(Dll.Filename)] | ||
| 244 | + private static extern IntPtr CreateOfflineRecognizer(ref OfflineRecognizerConfig config); | ||
| 245 | + | ||
| 246 | + [DllImport(Dll.Filename)] | ||
| 247 | + private static extern void DestroyOfflineRecognizer(IntPtr handle); | ||
| 248 | + | ||
| 249 | + [DllImport(Dll.Filename)] | ||
| 250 | + private static extern IntPtr CreateOfflineStream(IntPtr handle); | ||
| 251 | + | ||
| 252 | + [DllImport(Dll.Filename, EntryPoint = "DecodeOfflineStream")] | ||
| 253 | + private static extern void Decode(IntPtr handle, IntPtr stream); | ||
| 254 | + | ||
| 255 | + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOfflineStreams")] | ||
| 256 | + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); | ||
| 257 | + } | ||
| 258 | + | ||
| 259 | +} |
scripts/dotnet/online.cs
0 → 100644
| 1 | +/// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 2 | +/// Copyright (c) 2023 by manyeyes | ||
| 3 | + | ||
| 4 | +using System.Linq; | ||
| 5 | +using System.Collections.Generic; | ||
| 6 | +using System.Runtime.InteropServices; | ||
| 7 | +using System; | ||
| 8 | + | ||
| 9 | +namespace SherpaOnnx | ||
| 10 | +{ | ||
| 11 | + internal static class Dll | ||
| 12 | + { | ||
| 13 | + public const string Filename = "sherpa-onnx-c-api"; | ||
| 14 | + } | ||
| 15 | + | ||
| 16 | + [StructLayout(LayoutKind.Sequential)] | ||
| 17 | + public struct OnlineTransducerModelConfig | ||
| 18 | + { | ||
| 19 | + public OnlineTransducerModelConfig() | ||
| 20 | + { | ||
| 21 | + Encoder = ""; | ||
| 22 | + Decoder = ""; | ||
| 23 | + Joiner = ""; | ||
| 24 | + Tokens = ""; | ||
| 25 | + NumThreads = 1; | ||
| 26 | + Debug = 0; | ||
| 27 | + } | ||
| 28 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 29 | + public string Encoder; | ||
| 30 | + | ||
| 31 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 32 | + public string Decoder; | ||
| 33 | + | ||
| 34 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 35 | + public string Joiner; | ||
| 36 | + | ||
| 37 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 38 | + public string Tokens; | ||
| 39 | + | ||
| 40 | + /// Number of threads used to run the neural network model | ||
| 41 | + public int NumThreads; | ||
| 42 | + | ||
| 43 | + /// true to print debug information of the model | ||
| 44 | + public int Debug; | ||
| 45 | + } | ||
| 46 | + | ||
| 47 | + /// It expects 16 kHz 16-bit single channel wave format. | ||
| 48 | + [StructLayout(LayoutKind.Sequential)] | ||
| 49 | + public struct FeatureConfig | ||
| 50 | + { | ||
| 51 | + public FeatureConfig() | ||
| 52 | + { | ||
| 53 | + SampleRate = 16000; | ||
| 54 | + FeatureDim = 80; | ||
| 55 | + } | ||
| 56 | + /// Sample rate of the input data. MUST match the one expected | ||
| 57 | + /// by the model. For instance, it should be 16000 for models provided | ||
| 58 | + /// by us. | ||
| 59 | + public int SampleRate; | ||
| 60 | + | ||
| 61 | + /// Feature dimension of the model. | ||
| 62 | + /// For instance, it should be 80 for models provided by us. | ||
| 63 | + public int FeatureDim; | ||
| 64 | + } | ||
| 65 | + | ||
| 66 | + [StructLayout(LayoutKind.Sequential)] | ||
| 67 | + public struct OnlineRecognizerConfig | ||
| 68 | + { | ||
| 69 | + public OnlineRecognizerConfig() | ||
| 70 | + { | ||
| 71 | + FeatConfig = new FeatureConfig(); | ||
| 72 | + TransducerModelConfig = new OnlineTransducerModelConfig(); | ||
| 73 | + DecodingMethod = "greedy_search"; | ||
| 74 | + MaxActivePaths = 4; | ||
| 75 | + EnableEndpoint = 0; | ||
| 76 | + Rule1MinTrailingSilence = 1.2F; | ||
| 77 | + Rule2MinTrailingSilence = 2.4F; | ||
| 78 | + Rule3MinUtteranceLength = 20.0F; | ||
| 79 | + } | ||
| 80 | + public FeatureConfig FeatConfig; | ||
| 81 | + public OnlineTransducerModelConfig TransducerModelConfig; | ||
| 82 | + | ||
| 83 | + [MarshalAs(UnmanagedType.LPStr)] | ||
| 84 | + public string DecodingMethod; | ||
| 85 | + | ||
| 86 | + /// Used only when decoding_method is modified_beam_search | ||
| 87 | + /// Example value: 4 | ||
| 88 | + public int MaxActivePaths; | ||
| 89 | + | ||
| 90 | + /// 0 to disable endpoint detection. | ||
| 91 | + /// A non-zero value to enable endpoint detection. | ||
| 92 | + public int EnableEndpoint; | ||
| 93 | + | ||
| 94 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 95 | + /// this value even if nothing has been decoded. | ||
| 96 | + /// Used only when enable_endpoint is not 0. | ||
| 97 | + public float Rule1MinTrailingSilence; | ||
| 98 | + | ||
| 99 | + /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 100 | + /// this value after something that is not blank has been decoded. | ||
| 101 | + /// Used only when enable_endpoint is not 0. | ||
| 102 | + public float Rule2MinTrailingSilence; | ||
| 103 | + | ||
| 104 | + /// An endpoint is detected if the utterance in seconds is larger than | ||
| 105 | + /// this value. | ||
| 106 | + /// Used only when enable_endpoint is not 0. | ||
| 107 | + public float Rule3MinUtteranceLength; | ||
| 108 | + } | ||
| 109 | + | ||
| 110 | + public class OnlineRecognizerResult | ||
| 111 | + { | ||
| 112 | + public OnlineRecognizerResult(IntPtr handle) | ||
| 113 | + { | ||
| 114 | + Impl impl = (Impl)Marshal.PtrToStructure(handle, typeof(Impl)); | ||
| 115 | + _text = Marshal.PtrToStringUTF8(impl.Text); | ||
| 116 | + } | ||
| 117 | + | ||
| 118 | + [StructLayout(LayoutKind.Sequential)] | ||
| 119 | + struct Impl | ||
| 120 | + { | ||
| 121 | + public IntPtr Text; | ||
| 122 | + } | ||
| 123 | + | ||
| 124 | + private String _text; | ||
| 125 | + public String Text => _text; | ||
| 126 | + } | ||
| 127 | + | ||
| 128 | + public class OnlineStream : IDisposable | ||
| 129 | + { | ||
| 130 | + public OnlineStream(IntPtr p) | ||
| 131 | + { | ||
| 132 | + _handle = new HandleRef(this, p); | ||
| 133 | + } | ||
| 134 | + | ||
| 135 | + public void AcceptWaveform(int sampleRate, float[] samples) | ||
| 136 | + { | ||
| 137 | + AcceptWaveform(Handle, sampleRate, samples, samples.Length); | ||
| 138 | + } | ||
| 139 | + | ||
| 140 | + public void InputFinished() | ||
| 141 | + { | ||
| 142 | + InputFinished(Handle); | ||
| 143 | + } | ||
| 144 | + | ||
| 145 | + ~OnlineStream() | ||
| 146 | + { | ||
| 147 | + Cleanup(); | ||
| 148 | + } | ||
| 149 | + | ||
| 150 | + public void Dispose() | ||
| 151 | + { | ||
| 152 | + Cleanup(); | ||
| 153 | + // Prevent the object from being placed on the | ||
| 154 | + // finalization queue | ||
| 155 | + System.GC.SuppressFinalize(this); | ||
| 156 | + } | ||
| 157 | + | ||
| 158 | + private void Cleanup() | ||
| 159 | + { | ||
| 160 | + DestroyOnlineStream(Handle); | ||
| 161 | + | ||
| 162 | + // Don't permit the handle to be used again. | ||
| 163 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 164 | + } | ||
| 165 | + | ||
| 166 | + private HandleRef _handle; | ||
| 167 | + public IntPtr Handle => _handle.Handle; | ||
| 168 | + | ||
| 169 | + [DllImport(Dll.Filename)] | ||
| 170 | + private static extern void DestroyOnlineStream(IntPtr handle); | ||
| 171 | + | ||
| 172 | + [DllImport(Dll.Filename)] | ||
| 173 | + private static extern void AcceptWaveform(IntPtr handle, int sampleRate, float[] samples, int n); | ||
| 174 | + | ||
| 175 | + [DllImport(Dll.Filename)] | ||
| 176 | + private static extern void InputFinished(IntPtr handle); | ||
| 177 | + } | ||
| 178 | + | ||
| 179 | + // please see | ||
| 180 | + // https://www.mono-project.com/docs/advanced/pinvoke/#gc-safe-pinvoke-code | ||
| 181 | + // https://www.mono-project.com/docs/advanced/pinvoke/#properly-disposing-of-resources | ||
| 182 | + public class OnlineRecognizer : IDisposable | ||
| 183 | + { | ||
| 184 | + public OnlineRecognizer(OnlineRecognizerConfig config) | ||
| 185 | + { | ||
| 186 | + IntPtr h = CreateOnlineRecognizer(ref config); | ||
| 187 | + _handle = new HandleRef(this, h); | ||
| 188 | + } | ||
| 189 | + | ||
| 190 | + public OnlineStream CreateStream() | ||
| 191 | + { | ||
| 192 | + IntPtr p = CreateOnlineStream(_handle.Handle); | ||
| 193 | + return new OnlineStream(p); | ||
| 194 | + } | ||
| 195 | + | ||
| 196 | + /// Return true if the passed stream is ready for decoding. | ||
| 197 | + public bool IsReady(OnlineStream stream) | ||
| 198 | + { | ||
| 199 | + return IsReady(_handle.Handle, stream.Handle) != 0; | ||
| 200 | + } | ||
| 201 | + | ||
| 202 | + /// Return true if an endpoint is detected for this stream. | ||
| 203 | + /// You probably need to invoke Reset(stream) when this method returns | ||
| 204 | + /// true. | ||
| 205 | + public bool IsEndpoint(OnlineStream stream) | ||
| 206 | + { | ||
| 207 | + return IsEndpoint(_handle.Handle, stream.Handle) != 0; | ||
| 208 | + } | ||
| 209 | + | ||
| 210 | + /// You have to ensure that IsReady(stream) returns true before | ||
| 211 | + /// you call this method | ||
| 212 | + public void Decode(OnlineStream stream) | ||
| 213 | + { | ||
| 214 | + Decode(_handle.Handle, stream.Handle); | ||
| 215 | + } | ||
| 216 | + | ||
| 217 | + // The caller should ensure all passed streams are ready for decoding. | ||
| 218 | + public void Decode(IEnumerable<OnlineStream> streams) | ||
| 219 | + { | ||
| 220 | + IntPtr[] ptrs = streams.Select(s => s.Handle).ToArray(); | ||
| 221 | + Decode(_handle.Handle, ptrs, ptrs.Length); | ||
| 222 | + } | ||
| 223 | + | ||
| 224 | + public OnlineRecognizerResult GetResult(OnlineStream stream) | ||
| 225 | + { | ||
| 226 | + IntPtr h = GetResult(_handle.Handle, stream.Handle); | ||
| 227 | + OnlineRecognizerResult result = new OnlineRecognizerResult(h); | ||
| 228 | + DestroyResult(h); | ||
| 229 | + return result; | ||
| 230 | + } | ||
| 231 | + | ||
| 232 | + /// When this method returns, IsEndpoint(stream) will return false. | ||
| 233 | + public void Reset(OnlineStream stream) | ||
| 234 | + { | ||
| 235 | + Reset(_handle.Handle, stream.Handle); | ||
| 236 | + } | ||
| 237 | + | ||
| 238 | + public void Dispose() | ||
| 239 | + { | ||
| 240 | + Cleanup(); | ||
| 241 | + // Prevent the object from being placed on the | ||
| 242 | + // finalization queue | ||
| 243 | + System.GC.SuppressFinalize(this); | ||
| 244 | + } | ||
| 245 | + | ||
| 246 | + ~OnlineRecognizer() | ||
| 247 | + { | ||
| 248 | + Cleanup(); | ||
| 249 | + } | ||
| 250 | + | ||
| 251 | + private void Cleanup() | ||
| 252 | + { | ||
| 253 | + DestroyOnlineRecognizer(_handle.Handle); | ||
| 254 | + | ||
| 255 | + // Don't permit the handle to be used again. | ||
| 256 | + _handle = new HandleRef(this, IntPtr.Zero); | ||
| 257 | + } | ||
| 258 | + | ||
| 259 | + private HandleRef _handle; | ||
| 260 | + | ||
| 261 | + [DllImport(Dll.Filename)] | ||
| 262 | + private static extern IntPtr CreateOnlineRecognizer(ref OnlineRecognizerConfig config); | ||
| 263 | + | ||
| 264 | + [DllImport(Dll.Filename)] | ||
| 265 | + private static extern void DestroyOnlineRecognizer(IntPtr handle); | ||
| 266 | + | ||
| 267 | + [DllImport(Dll.Filename)] | ||
| 268 | + private static extern IntPtr CreateOnlineStream(IntPtr handle); | ||
| 269 | + | ||
| 270 | + [DllImport(Dll.Filename, EntryPoint = "IsOnlineStreamReady")] | ||
| 271 | + private static extern int IsReady(IntPtr handle, IntPtr stream); | ||
| 272 | + | ||
| 273 | + [DllImport(Dll.Filename, EntryPoint = "DecodeOnlineStream")] | ||
| 274 | + private static extern void Decode(IntPtr handle, IntPtr stream); | ||
| 275 | + | ||
| 276 | + [DllImport(Dll.Filename, EntryPoint = "DecodeMultipleOnlineStreams")] | ||
| 277 | + private static extern void Decode(IntPtr handle, IntPtr[] streams, int n); | ||
| 278 | + | ||
| 279 | + [DllImport(Dll.Filename, EntryPoint = "GetOnlineStreamResult")] | ||
| 280 | + private static extern IntPtr GetResult(IntPtr handle, IntPtr stream); | ||
| 281 | + | ||
| 282 | + [DllImport(Dll.Filename, EntryPoint = "DestroyOnlineRecognizerResult")] | ||
| 283 | + private static extern void DestroyResult(IntPtr result); | ||
| 284 | + | ||
| 285 | + [DllImport(Dll.Filename)] | ||
| 286 | + private static extern void Reset(IntPtr handle, IntPtr stream); | ||
| 287 | + | ||
| 288 | + [DllImport(Dll.Filename)] | ||
| 289 | + private static extern int IsEndpoint(IntPtr handle, IntPtr stream); | ||
| 290 | + } | ||
| 291 | +} |
scripts/dotnet/run.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | +# Copyright (c) 2023 Xiaomi Corporation | ||
| 3 | + | ||
| 4 | +set -ex | ||
| 5 | + | ||
| 6 | +mkdir -p macos linux windows all | ||
| 7 | + | ||
| 8 | +cp ./online.cs all | ||
| 9 | +cp ./offline.cs all | ||
| 10 | + | ||
| 11 | +./generate.py | ||
| 12 | + | ||
| 13 | +pushd linux | ||
| 14 | +dotnet build -c Release | ||
| 15 | +dotnet pack -c Release -o ../packages | ||
| 16 | +popd | ||
| 17 | + | ||
| 18 | +pushd macos | ||
| 19 | +dotnet build -c Release | ||
| 20 | +dotnet pack -c Release -o ../packages | ||
| 21 | +popd | ||
| 22 | + | ||
| 23 | +pushd windows | ||
| 24 | +dotnet build -c Release | ||
| 25 | +dotnet pack -c Release -o ../packages | ||
| 26 | +popd | ||
| 27 | + | ||
| 28 | +pushd all | ||
| 29 | +dotnet build -c Release | ||
| 30 | +dotnet pack -c Release -o ../packages | ||
| 31 | +popd | ||
| 32 | + | ||
| 33 | +ls -lh packages |
scripts/dotnet/sherpa-onnx.csproj.in
0 → 100644
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + <PropertyGroup> | ||
| 3 | + <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression> | ||
| 4 | + <PackageReadmeFile>README.md</PackageReadmeFile> | ||
| 5 | + <OutputType>Library</OutputType> | ||
| 6 | + <LangVersion>10.0</LangVersion> | ||
| 7 | + <TargetFrameworks>netstandard2.1;netcoreapp3.1;net6.0;net7.0</TargetFrameworks> | ||
| 8 | + <RuntimeIdentifiers>linux-x64;osx-x64;win-x64</RuntimeIdentifiers> | ||
| 9 | + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> | ||
| 10 | + <AssemblyName>sherpa-onnx</AssemblyName> | ||
| 11 | + <Version>{{ version }}</Version> | ||
| 12 | + | ||
| 13 | + <PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl> | ||
| 14 | + <RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl> | ||
| 15 | + <PackageTags>speech recognition voice audio stt asr speech-to-text AI offline | ||
| 16 | + privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags> | ||
| 17 | + | ||
| 18 | + <Authors>The Next-gen Kaldi development team</Authors> | ||
| 19 | + <Owners>The Next-gen Kaldi development team</Owners> | ||
| 20 | + <Company>Xiaomi Corporation</Company> | ||
| 21 | + <Copyright>Copyright 2019-2023 Xiaomi Corporation</Copyright> | ||
| 22 | + <Description>sherpa-onnx is an open-source real-time speech recognition toolkit developed | ||
| 23 | + by the Next-gen Kaldi team. It supports streaming recognition on a variety of | ||
| 24 | + platforms such as Android, iOS, Raspberry, Linux, Windows, macOS, etc. | ||
| 25 | + | ||
| 26 | + It does not require Internet connection during recognition. | ||
| 27 | + | ||
| 28 | + See the documentation https://k2-fsa.github.io/sherpa/onnx/index.html | ||
| 29 | + for details. | ||
| 30 | + </Description> | ||
| 31 | + | ||
| 32 | + <!-- Pack Option --> | ||
| 33 | + <Title>sherpa-onnx v{{ version }}</Title> | ||
| 34 | + <PackageId>org.k2fsa.sherpa.onnx</PackageId> | ||
| 35 | + | ||
| 36 | + <!-- Signing --> | ||
| 37 | + <SignAssembly>false</SignAssembly> | ||
| 38 | + <PublicSign>false</PublicSign> | ||
| 39 | + <DelaySign>false</DelaySign> | ||
| 40 | + </PropertyGroup> | ||
| 41 | + | ||
| 42 | + <PropertyGroup> | ||
| 43 | + <RestoreSources>{{ packages_dir }};$(RestoreSources);https://api.nuget.org/v3/index.json</RestoreSources> | ||
| 44 | + </PropertyGroup> | ||
| 45 | + | ||
| 46 | + <ItemGroup> | ||
| 47 | + <None Include="../README.md" Pack="true" PackagePath="/"/> | ||
| 48 | + </ItemGroup> | ||
| 49 | + | ||
| 50 | + <ItemGroup> | ||
| 51 | + <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.linux-x64" Version="{{ version }}" /> | ||
| 52 | + <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.osx-x64" Version="{{ version }}" /> | ||
| 53 | + <PackageReference Include="org.k2fsa.sherpa.onnx.runtime.win-x64" Version="{{ version }}" /> | ||
| 54 | + </ItemGroup> | ||
| 55 | + | ||
| 56 | +</Project> |
scripts/dotnet/sherpa-onnx.csproj.runtime.in
0 → 100644
| 1 | +<Project Sdk="Microsoft.NET.Sdk"> | ||
| 2 | + <PropertyGroup> | ||
| 3 | + <PackageLicenseExpression>Apache-2.0</PackageLicenseExpression> | ||
| 4 | + <PackageReadmeFile>README.md</PackageReadmeFile> | ||
| 5 | + <OutputType>Library</OutputType> | ||
| 6 | + <TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0</TargetFrameworks> | ||
| 7 | + <RuntimeIdentifier>{{ dotnet_rid }}</RuntimeIdentifier> | ||
| 8 | + <AssemblyName>sherpa-onnx</AssemblyName> | ||
| 9 | + <Version>{{ version }}</Version> | ||
| 10 | + | ||
| 11 | + <PackageProjectUrl>https://github.com/k2-fsa/sherpa-onnx</PackageProjectUrl> | ||
| 12 | + <RepositoryUrl>https://github.com/k2-fsa/sherpa-onnx</RepositoryUrl> | ||
| 13 | + <PackageTags>speech recognition voice audio stt asr speech-to-text AI offline | ||
| 14 | + privacy open-sourced next-gen-kaldi k2 kaldi2 sherpa-onnx</PackageTags> | ||
| 15 | + | ||
| 16 | + <!-- Nuget Properties --> | ||
| 17 | + <Description>.NET native {{ dotnet_rid }} wrapper for the sherpa-onnx project. | ||
| 18 | + | ||
| 19 | + In general, you don't need to use this package directly. | ||
| 20 | + | ||
| 21 | + Please use https://www.nuget.org/packages/org.k2fsa.sherpa.onnx instead | ||
| 22 | + </Description> | ||
| 23 | + <IncludeBuildOutput>false</IncludeBuildOutput> | ||
| 24 | + | ||
| 25 | + <!-- Pack Option --> | ||
| 26 | + <Title>sherpa-onnx {{ dotnet_rid }} v{{ version }}</Title> | ||
| 27 | + <PackageId>org.k2fsa.sherpa.onnx.runtime.{{ dotnet_rid }}</PackageId> | ||
| 28 | + | ||
| 29 | + <!-- Signing --> | ||
| 30 | + <SignAssembly>false</SignAssembly> | ||
| 31 | + <PublicSign>false</PublicSign> | ||
| 32 | + <DelaySign>false</DelaySign> | ||
| 33 | + </PropertyGroup> | ||
| 34 | + | ||
| 35 | + <ItemGroup> | ||
| 36 | + <None Include="../README.md" Pack="true" PackagePath="/"/> | ||
| 37 | + </ItemGroup> | ||
| 38 | + | ||
| 39 | + <ItemGroup> | ||
| 40 | + <!-- Native library must be in native directory... --> | ||
| 41 | + <!-- If project is built as a STATIC_LIBRARY (e.g. Windows) then we don't have to include it --> | ||
| 42 | + <Content Include=" | ||
| 43 | + {{ libs }} | ||
| 44 | + "> | ||
| 45 | + <PackagePath>runtimes/{{ dotnet_rid }}/native/%(Filename)%(Extension)</PackagePath> | ||
| 46 | + <Pack>true</Pack> | ||
| 47 | + <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | ||
| 48 | + </Content> | ||
| 49 | + </ItemGroup> | ||
| 50 | +</Project> |
| @@ -2,6 +2,11 @@ include_directories(${CMAKE_SOURCE_DIR}) | @@ -2,6 +2,11 @@ include_directories(${CMAKE_SOURCE_DIR}) | ||
| 2 | add_library(sherpa-onnx-c-api c-api.cc) | 2 | add_library(sherpa-onnx-c-api c-api.cc) |
| 3 | target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core) | 3 | target_link_libraries(sherpa-onnx-c-api sherpa-onnx-core) |
| 4 | 4 | ||
| 5 | +if(BUILD_SHARED_LIBS) | ||
| 6 | + target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_SHARED_LIBS=1) | ||
| 7 | + target_compile_definitions(sherpa-onnx-c-api PRIVATE SHERPA_ONNX_BUILD_MAIN_LIB=1) | ||
| 8 | +endif() | ||
| 9 | + | ||
| 5 | install(TARGETS sherpa-onnx-c-api DESTINATION lib) | 10 | install(TARGETS sherpa-onnx-c-api DESTINATION lib) |
| 6 | 11 | ||
| 7 | install(FILES c-api.h | 12 | install(FILES c-api.h |
| @@ -10,10 +10,11 @@ | @@ -10,10 +10,11 @@ | ||
| 10 | #include <vector> | 10 | #include <vector> |
| 11 | 11 | ||
| 12 | #include "sherpa-onnx/csrc/display.h" | 12 | #include "sherpa-onnx/csrc/display.h" |
| 13 | +#include "sherpa-onnx/csrc/offline-recognizer.h" | ||
| 13 | #include "sherpa-onnx/csrc/online-recognizer.h" | 14 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 14 | 15 | ||
| 15 | struct SherpaOnnxOnlineRecognizer { | 16 | struct SherpaOnnxOnlineRecognizer { |
| 16 | - sherpa_onnx::OnlineRecognizer *impl; | 17 | + std::unique_ptr<sherpa_onnx::OnlineRecognizer> impl; |
| 17 | }; | 18 | }; |
| 18 | 19 | ||
| 19 | struct SherpaOnnxOnlineStream { | 20 | struct SherpaOnnxOnlineStream { |
| @@ -56,14 +57,19 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | @@ -56,14 +57,19 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | ||
| 56 | recognizer_config.endpoint_config.rule3.min_utterance_length = | 57 | recognizer_config.endpoint_config.rule3.min_utterance_length = |
| 57 | config->rule3_min_utterance_length; | 58 | config->rule3_min_utterance_length; |
| 58 | 59 | ||
| 60 | + if (config->model_config.debug) { | ||
| 61 | + fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); | ||
| 62 | + } | ||
| 63 | + | ||
| 59 | SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; | 64 | SherpaOnnxOnlineRecognizer *recognizer = new SherpaOnnxOnlineRecognizer; |
| 60 | - recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config); | 65 | + |
| 66 | + recognizer->impl = | ||
| 67 | + std::make_unique<sherpa_onnx::OnlineRecognizer>(recognizer_config); | ||
| 61 | 68 | ||
| 62 | return recognizer; | 69 | return recognizer; |
| 63 | } | 70 | } |
| 64 | 71 | ||
| 65 | void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) { | 72 | void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer) { |
| 66 | - delete recognizer->impl; | ||
| 67 | delete recognizer; | 73 | delete recognizer; |
| 68 | } | 74 | } |
| 69 | 75 | ||
| @@ -144,3 +150,116 @@ void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; } | @@ -144,3 +150,116 @@ void DestroyDisplay(SherpaOnnxDisplay *display) { delete display; } | ||
| 144 | void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) { | 150 | void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s) { |
| 145 | display->impl->Print(idx, s); | 151 | display->impl->Print(idx, s); |
| 146 | } | 152 | } |
| 153 | + | ||
| 154 | +// ============================================================ | ||
| 155 | +// For offline ASR (i.e., non-streaming ASR) | ||
| 156 | +// ============================================================ | ||
| 157 | +// | ||
| 158 | +struct SherpaOnnxOfflineRecognizer { | ||
| 159 | + std::unique_ptr<sherpa_onnx::OfflineRecognizer> impl; | ||
| 160 | +}; | ||
| 161 | + | ||
| 162 | +struct SherpaOnnxOfflineStream { | ||
| 163 | + std::unique_ptr<sherpa_onnx::OfflineStream> impl; | ||
| 164 | + explicit SherpaOnnxOfflineStream( | ||
| 165 | + std::unique_ptr<sherpa_onnx::OfflineStream> p) | ||
| 166 | + : impl(std::move(p)) {} | ||
| 167 | +}; | ||
| 168 | + | ||
| 169 | +SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | ||
| 170 | + const SherpaOnnxOfflineRecognizerConfig *config) { | ||
| 171 | + sherpa_onnx::OfflineRecognizerConfig recognizer_config; | ||
| 172 | + | ||
| 173 | + recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | ||
| 174 | + | ||
| 175 | + recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | ||
| 176 | + | ||
| 177 | + recognizer_config.model_config.transducer.encoder_filename = | ||
| 178 | + config->model_config.transducer.encoder; | ||
| 179 | + | ||
| 180 | + recognizer_config.model_config.transducer.decoder_filename = | ||
| 181 | + config->model_config.transducer.decoder; | ||
| 182 | + | ||
| 183 | + recognizer_config.model_config.transducer.joiner_filename = | ||
| 184 | + config->model_config.transducer.joiner; | ||
| 185 | + | ||
| 186 | + recognizer_config.model_config.paraformer.model = | ||
| 187 | + config->model_config.paraformer.model; | ||
| 188 | + | ||
| 189 | + recognizer_config.model_config.nemo_ctc.model = | ||
| 190 | + config->model_config.nemo_ctc.model; | ||
| 191 | + | ||
| 192 | + recognizer_config.model_config.tokens = config->model_config.tokens; | ||
| 193 | + recognizer_config.model_config.num_threads = config->model_config.num_threads; | ||
| 194 | + recognizer_config.model_config.debug = config->model_config.debug; | ||
| 195 | + | ||
| 196 | + recognizer_config.lm_config.model = config->lm_config.model; | ||
| 197 | + recognizer_config.lm_config.scale = config->lm_config.scale; | ||
| 198 | + | ||
| 199 | + recognizer_config.decoding_method = config->decoding_method; | ||
| 200 | + recognizer_config.max_active_paths = config->max_active_paths; | ||
| 201 | + | ||
| 202 | + if (config->model_config.debug) { | ||
| 203 | + fprintf(stderr, "%s\n", recognizer_config.ToString().c_str()); | ||
| 204 | + } | ||
| 205 | + | ||
| 206 | + SherpaOnnxOfflineRecognizer *recognizer = new SherpaOnnxOfflineRecognizer; | ||
| 207 | + | ||
| 208 | + recognizer->impl = | ||
| 209 | + std::make_unique<sherpa_onnx::OfflineRecognizer>(recognizer_config); | ||
| 210 | + | ||
| 211 | + return recognizer; | ||
| 212 | +} | ||
| 213 | + | ||
| 214 | +void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer *recognizer) { | ||
| 215 | + delete recognizer; | ||
| 216 | +} | ||
| 217 | + | ||
| 218 | +SherpaOnnxOfflineStream *CreateOfflineStream( | ||
| 219 | + const SherpaOnnxOfflineRecognizer *recognizer) { | ||
| 220 | + SherpaOnnxOfflineStream *stream = | ||
| 221 | + new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); | ||
| 222 | + return stream; | ||
| 223 | +} | ||
| 224 | + | ||
| 225 | +void DestoryOfflineStream(SherpaOnnxOfflineStream *stream) { delete stream; } | ||
| 226 | + | ||
| 227 | +void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, int32_t sample_rate, | ||
| 228 | + const float *samples, int32_t n) { | ||
| 229 | + stream->impl->AcceptWaveform(sample_rate, samples, n); | ||
| 230 | +} | ||
| 231 | + | ||
| 232 | +void DecodeOfflineStream(SherpaOnnxOfflineRecognizer *recognizer, | ||
| 233 | + SherpaOnnxOfflineStream *stream) { | ||
| 234 | + recognizer->impl->DecodeStream(stream->impl.get()); | ||
| 235 | +} | ||
| 236 | + | ||
| 237 | +void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer *recognizer, | ||
| 238 | + SherpaOnnxOfflineStream **streams, | ||
| 239 | + int32_t n) { | ||
| 240 | + std::vector<sherpa_onnx::OfflineStream *> ss(n); | ||
| 241 | + for (int32_t i = 0; i != n; ++i) { | ||
| 242 | + ss[i] = streams[i]->impl.get(); | ||
| 243 | + } | ||
| 244 | + recognizer->impl->DecodeStreams(ss.data(), n); | ||
| 245 | +} | ||
| 246 | + | ||
| 247 | +SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( | ||
| 248 | + SherpaOnnxOfflineStream *stream) { | ||
| 249 | + const sherpa_onnx::OfflineRecognitionResult &result = | ||
| 250 | + stream->impl->GetResult(); | ||
| 251 | + const auto &text = result.text; | ||
| 252 | + | ||
| 253 | + auto r = new SherpaOnnxOfflineRecognizerResult; | ||
| 254 | + r->text = new char[text.size() + 1]; | ||
| 255 | + std::copy(text.begin(), text.end(), const_cast<char *>(r->text)); | ||
| 256 | + const_cast<char *>(r->text)[text.size()] = 0; | ||
| 257 | + | ||
| 258 | + return r; | ||
| 259 | +} | ||
| 260 | + | ||
| 261 | +void DestroyOfflineRecognizerResult( | ||
| 262 | + const SherpaOnnxOfflineRecognizerResult *r) { | ||
| 263 | + delete[] r->text; | ||
| 264 | + delete r; | ||
| 265 | +} |
| @@ -18,12 +18,35 @@ | @@ -18,12 +18,35 @@ | ||
| 18 | extern "C" { | 18 | extern "C" { |
| 19 | #endif | 19 | #endif |
| 20 | 20 | ||
| 21 | +// See https://github.com/pytorch/pytorch/blob/main/c10/macros/Export.h | ||
| 22 | +// We will set SHERPA_ONNX_BUILD_SHARED_LIBS and SHERPA_ONNX_BUILD_MAIN_LIB in | ||
| 23 | +// CMakeLists.txt | ||
| 24 | + | ||
| 25 | +#if defined(_WIN32) | ||
| 26 | +#if defined(SHERPA_ONNX_BUILD_SHARED_LIBS) | ||
| 27 | +#define SHERPA_ONNX_EXPORT __declspec(dllexport) | ||
| 28 | +#define SHERPA_ONNX_IMPORT __declspec(dllimport) | ||
| 29 | +#else | ||
| 30 | +#define SHERPA_ONNX_EXPORT | ||
| 31 | +#define SHERPA_ONNX_IMPORT | ||
| 32 | +#endif | ||
| 33 | +#else // WIN32 | ||
| 34 | +#define SHERPA_ONNX_EXPORT __attribute__((__visibility__("default"))) | ||
| 35 | +#define SHERPA_ONNX_IMPORT SHERPA_ONNX_EXPORT | ||
| 36 | +#endif | ||
| 37 | + | ||
| 38 | +#if defined(SHERPA_ONNX_BUILD_MAIN_LIB) | ||
| 39 | +#define SHERPA_ONNX_API SHERPA_ONNX_EXPORT | ||
| 40 | +#else | ||
| 41 | +#define SHERPA_ONNX_API SHERPA_ONNX_IMPORT | ||
| 42 | +#endif | ||
| 43 | + | ||
| 21 | /// Please refer to | 44 | /// Please refer to |
| 22 | /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | 45 | /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html |
| 23 | /// to download pre-trained models. That is, you can find encoder-xxx.onnx | 46 | /// to download pre-trained models. That is, you can find encoder-xxx.onnx |
| 24 | /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | 47 | /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct |
| 25 | /// from there. | 48 | /// from there. |
| 26 | -typedef struct SherpaOnnxOnlineTransducerModelConfig { | 49 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig { |
| 27 | const char *encoder; | 50 | const char *encoder; |
| 28 | const char *decoder; | 51 | const char *decoder; |
| 29 | const char *joiner; | 52 | const char *joiner; |
| @@ -33,7 +56,7 @@ typedef struct SherpaOnnxOnlineTransducerModelConfig { | @@ -33,7 +56,7 @@ typedef struct SherpaOnnxOnlineTransducerModelConfig { | ||
| 33 | } SherpaOnnxOnlineTransducerModelConfig; | 56 | } SherpaOnnxOnlineTransducerModelConfig; |
| 34 | 57 | ||
| 35 | /// It expects 16 kHz 16-bit single channel wave format. | 58 | /// It expects 16 kHz 16-bit single channel wave format. |
| 36 | -typedef struct SherpaOnnxFeatureConfig { | 59 | +SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { |
| 37 | /// Sample rate of the input data. MUST match the one expected | 60 | /// Sample rate of the input data. MUST match the one expected |
| 38 | /// by the model. For instance, it should be 16000 for models provided | 61 | /// by the model. For instance, it should be 16000 for models provided |
| 39 | /// by us. | 62 | /// by us. |
| @@ -44,7 +67,7 @@ typedef struct SherpaOnnxFeatureConfig { | @@ -44,7 +67,7 @@ typedef struct SherpaOnnxFeatureConfig { | ||
| 44 | int32_t feature_dim; | 67 | int32_t feature_dim; |
| 45 | } SherpaOnnxFeatureConfig; | 68 | } SherpaOnnxFeatureConfig; |
| 46 | 69 | ||
| 47 | -typedef struct SherpaOnnxOnlineRecognizerConfig { | 70 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { |
| 48 | SherpaOnnxFeatureConfig feat_config; | 71 | SherpaOnnxFeatureConfig feat_config; |
| 49 | SherpaOnnxOnlineTransducerModelConfig model_config; | 72 | SherpaOnnxOnlineTransducerModelConfig model_config; |
| 50 | 73 | ||
| @@ -75,7 +98,7 @@ typedef struct SherpaOnnxOnlineRecognizerConfig { | @@ -75,7 +98,7 @@ typedef struct SherpaOnnxOnlineRecognizerConfig { | ||
| 75 | float rule3_min_utterance_length; | 98 | float rule3_min_utterance_length; |
| 76 | } SherpaOnnxOnlineRecognizerConfig; | 99 | } SherpaOnnxOnlineRecognizerConfig; |
| 77 | 100 | ||
| 78 | -typedef struct SherpaOnnxOnlineRecognizerResult { | 101 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerResult { |
| 79 | const char *text; | 102 | const char *text; |
| 80 | // TODO(fangjun): Add more fields | 103 | // TODO(fangjun): Add more fields |
| 81 | } SherpaOnnxOnlineRecognizerResult; | 104 | } SherpaOnnxOnlineRecognizerResult; |
| @@ -83,32 +106,34 @@ typedef struct SherpaOnnxOnlineRecognizerResult { | @@ -83,32 +106,34 @@ typedef struct SherpaOnnxOnlineRecognizerResult { | ||
| 83 | /// Note: OnlineRecognizer here means StreamingRecognizer. | 106 | /// Note: OnlineRecognizer here means StreamingRecognizer. |
| 84 | /// It does not need to access the Internet during recognition. | 107 | /// It does not need to access the Internet during recognition. |
| 85 | /// Everything is run locally. | 108 | /// Everything is run locally. |
| 86 | -typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer; | ||
| 87 | -typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; | 109 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizer |
| 110 | + SherpaOnnxOnlineRecognizer; | ||
| 111 | +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; | ||
| 88 | 112 | ||
| 89 | -/// @param config Config for the recongizer. | 113 | +/// @param config Config for the recognizer. |
| 90 | /// @return Return a pointer to the recognizer. The user has to invoke | 114 | /// @return Return a pointer to the recognizer. The user has to invoke |
| 91 | // DestroyOnlineRecognizer() to free it to avoid memory leak. | 115 | // DestroyOnlineRecognizer() to free it to avoid memory leak. |
| 92 | -SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( | 116 | +SHERPA_ONNX_API SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( |
| 93 | const SherpaOnnxOnlineRecognizerConfig *config); | 117 | const SherpaOnnxOnlineRecognizerConfig *config); |
| 94 | 118 | ||
| 95 | /// Free a pointer returned by CreateOnlineRecognizer() | 119 | /// Free a pointer returned by CreateOnlineRecognizer() |
| 96 | /// | 120 | /// |
| 97 | /// @param p A pointer returned by CreateOnlineRecognizer() | 121 | /// @param p A pointer returned by CreateOnlineRecognizer() |
| 98 | -void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer *recognizer); | 122 | +SHERPA_ONNX_API void DestroyOnlineRecognizer( |
| 123 | + SherpaOnnxOnlineRecognizer *recognizer); | ||
| 99 | 124 | ||
| 100 | /// Create an online stream for accepting wave samples. | 125 | /// Create an online stream for accepting wave samples. |
| 101 | /// | 126 | /// |
| 102 | /// @param recognizer A pointer returned by CreateOnlineRecognizer() | 127 | /// @param recognizer A pointer returned by CreateOnlineRecognizer() |
| 103 | /// @return Return a pointer to an OnlineStream. The user has to invoke | 128 | /// @return Return a pointer to an OnlineStream. The user has to invoke |
| 104 | /// DestoryOnlineStream() to free it to avoid memory leak. | 129 | /// DestoryOnlineStream() to free it to avoid memory leak. |
| 105 | -SherpaOnnxOnlineStream *CreateOnlineStream( | 130 | +SHERPA_ONNX_API SherpaOnnxOnlineStream *CreateOnlineStream( |
| 106 | const SherpaOnnxOnlineRecognizer *recognizer); | 131 | const SherpaOnnxOnlineRecognizer *recognizer); |
| 107 | 132 | ||
| 108 | -/// Destory an online stream. | 133 | +/// Destroy an online stream. |
| 109 | /// | 134 | /// |
| 110 | /// @param stream A pointer returned by CreateOnlineStream() | 135 | /// @param stream A pointer returned by CreateOnlineStream() |
| 111 | -void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); | 136 | +SHERPA_ONNX_API void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); |
| 112 | 137 | ||
| 113 | /// Accept input audio samples and compute the features. | 138 | /// Accept input audio samples and compute the features. |
| 114 | /// The user has to invoke DecodeOnlineStream() to run the neural network and | 139 | /// The user has to invoke DecodeOnlineStream() to run the neural network and |
| @@ -121,16 +146,17 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); | @@ -121,16 +146,17 @@ void DestoryOnlineStream(SherpaOnnxOnlineStream *stream); | ||
| 121 | /// @param samples A pointer to a 1-D array containing audio samples. | 146 | /// @param samples A pointer to a 1-D array containing audio samples. |
| 122 | /// The range of samples has to be normalized to [-1, 1]. | 147 | /// The range of samples has to be normalized to [-1, 1]. |
| 123 | /// @param n Number of elements in the samples array. | 148 | /// @param n Number of elements in the samples array. |
| 124 | -void AcceptWaveform(SherpaOnnxOnlineStream *stream, int32_t sample_rate, | ||
| 125 | - const float *samples, int32_t n); | 149 | +SHERPA_ONNX_API void AcceptWaveform(SherpaOnnxOnlineStream *stream, |
| 150 | + int32_t sample_rate, const float *samples, | ||
| 151 | + int32_t n); | ||
| 126 | 152 | ||
| 127 | /// Return 1 if there are enough number of feature frames for decoding. | 153 | /// Return 1 if there are enough number of feature frames for decoding. |
| 128 | /// Return 0 otherwise. | 154 | /// Return 0 otherwise. |
| 129 | /// | 155 | /// |
| 130 | /// @param recognizer A pointer returned by CreateOnlineRecognizer | 156 | /// @param recognizer A pointer returned by CreateOnlineRecognizer |
| 131 | /// @param stream A pointer returned by CreateOnlineStream | 157 | /// @param stream A pointer returned by CreateOnlineStream |
| 132 | -int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 133 | - SherpaOnnxOnlineStream *stream); | 158 | +SHERPA_ONNX_API int32_t IsOnlineStreamReady( |
| 159 | + SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream); | ||
| 134 | 160 | ||
| 135 | /// Call this function to run the neural network model and decoding. | 161 | /// Call this function to run the neural network model and decoding. |
| 136 | // | 162 | // |
| @@ -142,8 +168,8 @@ int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer, | @@ -142,8 +168,8 @@ int32_t IsOnlineStreamReady(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 142 | /// DecodeOnlineStream(recognizer, stream); | 168 | /// DecodeOnlineStream(recognizer, stream); |
| 143 | /// } | 169 | /// } |
| 144 | /// | 170 | /// |
| 145 | -void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 146 | - SherpaOnnxOnlineStream *stream); | 171 | +SHERPA_ONNX_API void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, |
| 172 | + SherpaOnnxOnlineStream *stream); | ||
| 147 | 173 | ||
| 148 | /// This function is similar to DecodeOnlineStream(). It decodes multiple | 174 | /// This function is similar to DecodeOnlineStream(). It decodes multiple |
| 149 | /// OnlineStream in parallel. | 175 | /// OnlineStream in parallel. |
| @@ -155,8 +181,9 @@ void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, | @@ -155,8 +181,9 @@ void DecodeOnlineStream(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 155 | /// @param streams A pointer array containing pointers returned by | 181 | /// @param streams A pointer array containing pointers returned by |
| 156 | /// CreateOnlineRecognizer() | 182 | /// CreateOnlineRecognizer() |
| 157 | /// @param n Number of elements in the given streams array. | 183 | /// @param n Number of elements in the given streams array. |
| 158 | -void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 159 | - SherpaOnnxOnlineStream **streams, int32_t n); | 184 | +SHERPA_ONNX_API void DecodeMultipleOnlineStreams( |
| 185 | + SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream **streams, | ||
| 186 | + int32_t n); | ||
| 160 | 187 | ||
| 161 | /// Get the decoding results so far for an OnlineStream. | 188 | /// Get the decoding results so far for an OnlineStream. |
| 162 | /// | 189 | /// |
| @@ -165,47 +192,188 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer, | @@ -165,47 +192,188 @@ void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 165 | /// @return A pointer containing the result. The user has to invoke | 192 | /// @return A pointer containing the result. The user has to invoke |
| 166 | /// DestroyOnlineRecognizerResult() to free the returned pointer to | 193 | /// DestroyOnlineRecognizerResult() to free the returned pointer to |
| 167 | /// avoid memory leak. | 194 | /// avoid memory leak. |
| 168 | -SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( | 195 | +SHERPA_ONNX_API SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( |
| 169 | SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream); | 196 | SherpaOnnxOnlineRecognizer *recognizer, SherpaOnnxOnlineStream *stream); |
| 170 | 197 | ||
| 171 | /// Destroy the pointer returned by GetOnlineStreamResult(). | 198 | /// Destroy the pointer returned by GetOnlineStreamResult(). |
| 172 | /// | 199 | /// |
| 173 | /// @param r A pointer returned by GetOnlineStreamResult() | 200 | /// @param r A pointer returned by GetOnlineStreamResult() |
| 174 | -void DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult *r); | 201 | +SHERPA_ONNX_API void DestroyOnlineRecognizerResult( |
| 202 | + const SherpaOnnxOnlineRecognizerResult *r); | ||
| 175 | 203 | ||
| 176 | /// Reset an OnlineStream , which clears the neural network model state | 204 | /// Reset an OnlineStream , which clears the neural network model state |
| 177 | /// and the state for decoding. | 205 | /// and the state for decoding. |
| 178 | /// | 206 | /// |
| 179 | /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | 207 | /// @param recognizer A pointer returned by CreateOnlineRecognizer(). |
| 180 | /// @param stream A pointer returned by CreateOnlineStream | 208 | /// @param stream A pointer returned by CreateOnlineStream |
| 181 | -void Reset(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 182 | - SherpaOnnxOnlineStream *stream); | 209 | +SHERPA_ONNX_API void Reset(SherpaOnnxOnlineRecognizer *recognizer, |
| 210 | + SherpaOnnxOnlineStream *stream); | ||
| 183 | 211 | ||
| 184 | /// Signal that no more audio samples would be available. | 212 | /// Signal that no more audio samples would be available. |
| 185 | /// After this call, you cannot call AcceptWaveform() any more. | 213 | /// After this call, you cannot call AcceptWaveform() any more. |
| 186 | /// | 214 | /// |
| 187 | /// @param stream A pointer returned by CreateOnlineStream() | 215 | /// @param stream A pointer returned by CreateOnlineStream() |
| 188 | -void InputFinished(SherpaOnnxOnlineStream *stream); | 216 | +SHERPA_ONNX_API void InputFinished(SherpaOnnxOnlineStream *stream); |
| 189 | 217 | ||
| 190 | /// Return 1 if an endpoint has been detected. | 218 | /// Return 1 if an endpoint has been detected. |
| 191 | /// | 219 | /// |
| 192 | /// @param recognizer A pointer returned by CreateOnlineRecognizer() | 220 | /// @param recognizer A pointer returned by CreateOnlineRecognizer() |
| 193 | /// @param stream A pointer returned by CreateOnlineStream() | 221 | /// @param stream A pointer returned by CreateOnlineStream() |
| 194 | /// @return Return 1 if an endpoint is detected. Return 0 otherwise. | 222 | /// @return Return 1 if an endpoint is detected. Return 0 otherwise. |
| 195 | -int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer, | ||
| 196 | - SherpaOnnxOnlineStream *stream); | 223 | +SHERPA_ONNX_API int32_t IsEndpoint(SherpaOnnxOnlineRecognizer *recognizer, |
| 224 | + SherpaOnnxOnlineStream *stream); | ||
| 197 | 225 | ||
| 198 | // for displaying results on Linux/macOS. | 226 | // for displaying results on Linux/macOS. |
| 199 | -typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; | 227 | +SHERPA_ONNX_API typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; |
| 200 | 228 | ||
| 201 | /// Create a display object. Must be freed using DestroyDisplay to avoid | 229 | /// Create a display object. Must be freed using DestroyDisplay to avoid |
| 202 | /// memory leak. | 230 | /// memory leak. |
| 203 | -SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line); | 231 | +SHERPA_ONNX_API SherpaOnnxDisplay *CreateDisplay(int32_t max_word_per_line); |
| 204 | 232 | ||
| 205 | -void DestroyDisplay(SherpaOnnxDisplay *display); | 233 | +SHERPA_ONNX_API void DestroyDisplay(SherpaOnnxDisplay *display); |
| 206 | 234 | ||
| 207 | /// Print the result. | 235 | /// Print the result. |
| 208 | -void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, const char *s); | 236 | +SHERPA_ONNX_API void SherpaOnnxPrint(SherpaOnnxDisplay *display, int32_t idx, |
| 237 | + const char *s); | ||
| 238 | +// ============================================================ | ||
| 239 | +// For offline ASR (i.e., non-streaming ASR) | ||
| 240 | +// ============================================================ | ||
| 241 | + | ||
| 242 | +/// Please refer to | ||
| 243 | +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 244 | +/// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 245 | +/// decoder-xxx.onnx, and joiner-xxx.onnx for this struct | ||
| 246 | +/// from there. | ||
| 247 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTransducerModelConfig { | ||
| 248 | + const char *encoder; | ||
| 249 | + const char *decoder; | ||
| 250 | + const char *joiner; | ||
| 251 | +} SherpaOnnxOfflineTransducerModelConfig; | ||
| 252 | + | ||
| 253 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineParaformerModelConfig { | ||
| 254 | + const char *model; | ||
| 255 | +} SherpaOnnxOfflineParaformerModelConfig; | ||
| 256 | + | ||
| 257 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineNemoEncDecCtcModelConfig { | ||
| 258 | + const char *model; | ||
| 259 | +} SherpaOnnxOfflineNemoEncDecCtcModelConfig; | ||
| 260 | + | ||
| 261 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineLMConfig { | ||
| 262 | + const char *model; | ||
| 263 | + float scale; | ||
| 264 | +} SherpaOnnxOfflineLMConfig; | ||
| 265 | + | ||
| 266 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineModelConfig { | ||
| 267 | + SherpaOnnxOfflineTransducerModelConfig transducer; | ||
| 268 | + SherpaOnnxOfflineParaformerModelConfig paraformer; | ||
| 269 | + SherpaOnnxOfflineNemoEncDecCtcModelConfig nemo_ctc; | ||
| 270 | + | ||
| 271 | + const char *tokens; | ||
| 272 | + int32_t num_threads; | ||
| 273 | + int32_t debug; | ||
| 274 | +} SherpaOnnxOfflineModelConfig; | ||
| 275 | + | ||
| 276 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerConfig { | ||
| 277 | + SherpaOnnxFeatureConfig feat_config; | ||
| 278 | + SherpaOnnxOfflineModelConfig model_config; | ||
| 279 | + SherpaOnnxOfflineLMConfig lm_config; | ||
| 280 | + | ||
| 281 | + const char *decoding_method; | ||
| 282 | + int32_t max_active_paths; | ||
| 283 | +} SherpaOnnxOfflineRecognizerConfig; | ||
| 284 | + | ||
| 285 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizer | ||
| 286 | + SherpaOnnxOfflineRecognizer; | ||
| 287 | + | ||
| 288 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; | ||
| 289 | + | ||
| 290 | +/// @param config Config for the recognizer. | ||
| 291 | +/// @return Return a pointer to the recognizer. The user has to invoke | ||
| 292 | +// DestroyOfflineRecognizer() to free it to avoid memory leak. | ||
| 293 | +SHERPA_ONNX_API SherpaOnnxOfflineRecognizer *CreateOfflineRecognizer( | ||
| 294 | + const SherpaOnnxOfflineRecognizerConfig *config); | ||
| 295 | + | ||
| 296 | +/// Free a pointer returned by CreateOfflineRecognizer() | ||
| 297 | +/// | ||
| 298 | +/// @param p A pointer returned by CreateOfflineRecognizer() | ||
| 299 | +SHERPA_ONNX_API void DestroyOfflineRecognizer( | ||
| 300 | + SherpaOnnxOfflineRecognizer *recognizer); | ||
| 301 | + | ||
| 302 | +/// Create an offline stream for accepting wave samples. | ||
| 303 | +/// | ||
| 304 | +/// @param recognizer A pointer returned by CreateOfflineRecognizer() | ||
| 305 | +/// @return Return a pointer to an OfflineStream. The user has to invoke | ||
| 306 | +/// DestoryOfflineStream() to free it to avoid memory leak. | ||
| 307 | +SHERPA_ONNX_API SherpaOnnxOfflineStream *CreateOfflineStream( | ||
| 308 | + const SherpaOnnxOfflineRecognizer *recognizer); | ||
| 309 | + | ||
| 310 | +/// Destroy an offline stream. | ||
| 311 | +/// | ||
| 312 | +/// @param stream A pointer returned by CreateOfflineStream() | ||
| 313 | +SHERPA_ONNX_API void DestoryOfflineStream(SherpaOnnxOfflineStream *stream); | ||
| 314 | + | ||
| 315 | +/// Accept input audio samples and compute the features. | ||
| 316 | +/// The user has to invoke DecodeOfflineStream() to run the neural network and | ||
| 317 | +/// decoding. | ||
| 318 | +/// | ||
| 319 | +/// @param stream A pointer returned by CreateOfflineStream(). | ||
| 320 | +/// @param sample_rate Sample rate of the input samples. If it is different | ||
| 321 | +/// from config.feat_config.sample_rate, we will do | ||
| 322 | +/// resampling inside sherpa-onnx. | ||
| 323 | +/// @param samples A pointer to a 1-D array containing audio samples. | ||
| 324 | +/// The range of samples has to be normalized to [-1, 1]. | ||
| 325 | +/// @param n Number of elements in the samples array. | ||
| 326 | +/// | ||
| 327 | +/// @caution: For each offline stream, please invoke this function only once! | ||
| 328 | +SHERPA_ONNX_API void AcceptWaveformOffline(SherpaOnnxOfflineStream *stream, | ||
| 329 | + int32_t sample_rate, | ||
| 330 | + const float *samples, int32_t n); | ||
| 331 | +/// Decode an offline stream. | ||
| 332 | +/// | ||
| 333 | +/// We assume you have invoked AcceptWaveformOffline() for the given stream | ||
| 334 | +/// before calling this function. | ||
| 335 | +/// | ||
| 336 | +/// @param recognizer A pointer returned by CreateOfflineRecognizer(). | ||
| 337 | +/// @param stream A pointer returned by CreateOfflineStream() | ||
| 338 | +SHERPA_ONNX_API void DecodeOfflineStream( | ||
| 339 | + SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream *stream); | ||
| 340 | + | ||
| 341 | +/// Decode a list offline streams in parallel. | ||
| 342 | +/// | ||
| 343 | +/// We assume you have invoked AcceptWaveformOffline() for each stream | ||
| 344 | +/// before calling this function. | ||
| 345 | +/// | ||
| 346 | +/// @param recognizer A pointer returned by CreateOfflineRecognizer(). | ||
| 347 | +/// @param streams A pointer pointer array containing pointers returned | ||
| 348 | +/// by CreateOfflineStream(). | ||
| 349 | +/// @param n Number of entries in the given streams. | ||
| 350 | +SHERPA_ONNX_API void DecodeMultipleOfflineStreams( | ||
| 351 | + SherpaOnnxOfflineRecognizer *recognizer, SherpaOnnxOfflineStream **streams, | ||
| 352 | + int32_t n); | ||
| 353 | + | ||
| 354 | +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineRecognizerResult { | ||
| 355 | + const char *text; | ||
| 356 | + // TODO(fangjun): Add more fields | ||
| 357 | +} SherpaOnnxOfflineRecognizerResult; | ||
| 358 | + | ||
| 359 | +/// Get the result of the offline stream. | ||
| 360 | +/// | ||
| 361 | +/// We assume you have called DecodeOfflineStream() or | ||
| 362 | +/// DecodeMultipleOfflineStreams() with the given stream before calling | ||
| 363 | +/// this function. | ||
| 364 | +/// | ||
| 365 | +/// @param stream A pointer returned by CreateOfflineStream(). | ||
| 366 | +/// @return Return a pointer to the result. The user has to invoke | ||
| 367 | +/// DestroyOnlineRecognizerResult() to free the returned pointer to | ||
| 368 | +/// avoid memory leak. | ||
| 369 | +SHERPA_ONNX_API SherpaOnnxOfflineRecognizerResult *GetOfflineStreamResult( | ||
| 370 | + SherpaOnnxOfflineStream *stream); | ||
| 371 | + | ||
| 372 | +/// Destroy the pointer returned by GetOfflineStreamResult(). | ||
| 373 | +/// | ||
| 374 | +/// @param r A pointer returned by GetOfflineStreamResult() | ||
| 375 | +SHERPA_ONNX_API void DestroyOfflineRecognizerResult( | ||
| 376 | + const SherpaOnnxOfflineRecognizerResult *r); | ||
| 209 | 377 | ||
| 210 | #ifdef __cplusplus | 378 | #ifdef __cplusplus |
| 211 | } /* extern "C" */ | 379 | } /* extern "C" */ |
sherpa-onnx/csharp-api/SherpaOnnx.cs
已删除
100644 → 0
| 1 | -using System.Runtime.InteropServices; | ||
| 2 | -using System.Diagnostics; | ||
| 3 | - | ||
| 4 | -namespace SherpaOnnx | ||
| 5 | -{ | ||
| 6 | - /// <summary> | ||
| 7 | - /// online recognizer package | ||
| 8 | - /// Copyright (c) 2023 by manyeyes | ||
| 9 | - /// </summary> | ||
| 10 | - public class OnlineBase : IDisposable | ||
| 11 | - { | ||
| 12 | - public void Dispose() | ||
| 13 | - { | ||
| 14 | - Dispose(disposing: true); | ||
| 15 | - GC.SuppressFinalize(this); | ||
| 16 | - } | ||
| 17 | - protected virtual void Dispose(bool disposing) | ||
| 18 | - { | ||
| 19 | - if (!disposing) | ||
| 20 | - { | ||
| 21 | - if (_onlineRecognizerResult != IntPtr.Zero) | ||
| 22 | - { | ||
| 23 | - SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); | ||
| 24 | - _onlineRecognizerResult = IntPtr.Zero; | ||
| 25 | - } | ||
| 26 | - if (_onlineStream.impl != IntPtr.Zero) | ||
| 27 | - { | ||
| 28 | - SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); | ||
| 29 | - _onlineStream.impl = IntPtr.Zero; | ||
| 30 | - } | ||
| 31 | - if (_onlineRecognizer.impl != IntPtr.Zero) | ||
| 32 | - { | ||
| 33 | - SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); | ||
| 34 | - _onlineRecognizer.impl = IntPtr.Zero; | ||
| 35 | - } | ||
| 36 | - this._disposed = true; | ||
| 37 | - } | ||
| 38 | - } | ||
| 39 | - ~OnlineBase() | ||
| 40 | - { | ||
| 41 | - Dispose(this._disposed); | ||
| 42 | - } | ||
| 43 | - internal SherpaOnnxOnlineStream _onlineStream; | ||
| 44 | - internal IntPtr _onlineRecognizerResult; | ||
| 45 | - internal SherpaOnnxOnlineRecognizer _onlineRecognizer; | ||
| 46 | - internal bool _disposed = false; | ||
| 47 | - } | ||
| 48 | - public class OnlineStream : OnlineBase | ||
| 49 | - { | ||
| 50 | - internal OnlineStream(SherpaOnnxOnlineStream onlineStream) | ||
| 51 | - { | ||
| 52 | - this._onlineStream = onlineStream; | ||
| 53 | - } | ||
| 54 | - protected override void Dispose(bool disposing) | ||
| 55 | - { | ||
| 56 | - if (!disposing) | ||
| 57 | - { | ||
| 58 | - SherpaOnnxSharp.DestroyOnlineStream(_onlineStream); | ||
| 59 | - _onlineStream.impl = IntPtr.Zero; | ||
| 60 | - this._disposed = true; | ||
| 61 | - base.Dispose(); | ||
| 62 | - } | ||
| 63 | - } | ||
| 64 | - } | ||
| 65 | - public class OnlineRecognizerResult : OnlineBase | ||
| 66 | - { | ||
| 67 | - internal OnlineRecognizerResult(IntPtr onlineRecognizerResult) | ||
| 68 | - { | ||
| 69 | - this._onlineRecognizerResult = onlineRecognizerResult; | ||
| 70 | - } | ||
| 71 | - protected override void Dispose(bool disposing) | ||
| 72 | - { | ||
| 73 | - if (!disposing) | ||
| 74 | - { | ||
| 75 | - SherpaOnnxSharp.DestroyOnlineRecognizerResult(_onlineRecognizerResult); | ||
| 76 | - _onlineRecognizerResult = IntPtr.Zero; | ||
| 77 | - this._disposed = true; | ||
| 78 | - base.Dispose(disposing); | ||
| 79 | - } | ||
| 80 | - } | ||
| 81 | - } | ||
| 82 | - public class OnlineRecognizer<T> : OnlineBase | ||
| 83 | - where T : class, new() | ||
| 84 | - { | ||
| 85 | - | ||
| 86 | - public OnlineRecognizer(T t, | ||
| 87 | - string tokensFilePath, string decoding_method = "greedy_search", | ||
| 88 | - int sample_rate = 16000, int feature_dim = 80, | ||
| 89 | - int num_threads = 2, bool debug = false, int max_active_paths = 4, | ||
| 90 | - int enable_endpoint=0,int rule1_min_trailing_silence=0, | ||
| 91 | - int rule2_min_trailing_silence=0,int rule3_min_utterance_length=0) | ||
| 92 | - { | ||
| 93 | - SherpaOnnxOnlineTransducer transducer = new SherpaOnnxOnlineTransducer(); | ||
| 94 | - SherpaOnnxOnlineModelConfig model_config = new SherpaOnnxOnlineModelConfig(); | ||
| 95 | - if (t is not null && t.GetType() == typeof(OnlineTransducer)) | ||
| 96 | - { | ||
| 97 | - OnlineTransducer? onlineTransducer = t as OnlineTransducer; | ||
| 98 | -#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 99 | - Trace.Assert(File.Exists(onlineTransducer.DecoderFilename) | ||
| 100 | - && File.Exists(onlineTransducer.EncoderFilename) | ||
| 101 | - && File.Exists(onlineTransducer.JoinerFilename), "Please provide a model"); | ||
| 102 | -#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 103 | - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 104 | - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 105 | - transducer.encoder_filename = onlineTransducer.EncoderFilename; | ||
| 106 | - transducer.decoder_filename = onlineTransducer.DecoderFilename; | ||
| 107 | - transducer.joiner_filename = onlineTransducer.JoinerFilename; | ||
| 108 | - } | ||
| 109 | - | ||
| 110 | - model_config.transducer = transducer; | ||
| 111 | - model_config.num_threads = num_threads; | ||
| 112 | - model_config.debug = debug; | ||
| 113 | - model_config.tokens = tokensFilePath; | ||
| 114 | - | ||
| 115 | - SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); | ||
| 116 | - feat_config.sample_rate = sample_rate; | ||
| 117 | - feat_config.feature_dim = feature_dim; | ||
| 118 | - | ||
| 119 | - SherpaOnnxOnlineRecognizerConfig sherpaOnnxOnlineRecognizerConfig; | ||
| 120 | - sherpaOnnxOnlineRecognizerConfig.decoding_method = decoding_method; | ||
| 121 | - sherpaOnnxOnlineRecognizerConfig.feat_config = feat_config; | ||
| 122 | - sherpaOnnxOnlineRecognizerConfig.model_config = model_config; | ||
| 123 | - sherpaOnnxOnlineRecognizerConfig.max_active_paths = max_active_paths; | ||
| 124 | - //endpoint | ||
| 125 | - sherpaOnnxOnlineRecognizerConfig.enable_endpoint = enable_endpoint; | ||
| 126 | - sherpaOnnxOnlineRecognizerConfig.rule1_min_trailing_silence = rule1_min_trailing_silence; | ||
| 127 | - sherpaOnnxOnlineRecognizerConfig.rule2_min_trailing_silence = rule2_min_trailing_silence; | ||
| 128 | - sherpaOnnxOnlineRecognizerConfig.rule3_min_utterance_length = rule3_min_utterance_length; | ||
| 129 | - | ||
| 130 | - _onlineRecognizer = | ||
| 131 | - SherpaOnnxSharp.CreateOnlineRecognizer(sherpaOnnxOnlineRecognizerConfig); | ||
| 132 | - } | ||
| 133 | - internal OnlineStream CreateOnlineStream() | ||
| 134 | - { | ||
| 135 | - SherpaOnnxOnlineStream stream = SherpaOnnxSharp.CreateOnlineStream(_onlineRecognizer); | ||
| 136 | - return new OnlineStream(stream); | ||
| 137 | - } | ||
| 138 | - public void InputFinished(OnlineStream stream) | ||
| 139 | - { | ||
| 140 | - SherpaOnnxSharp.InputFinished(stream._onlineStream); | ||
| 141 | - } | ||
| 142 | - public List<OnlineStream> CreateStreams(List<float[]> samplesList) | ||
| 143 | - { | ||
| 144 | - int batch_size = samplesList.Count; | ||
| 145 | - List<OnlineStream> streams = new List<OnlineStream>(); | ||
| 146 | - for (int i = 0; i < batch_size; i++) | ||
| 147 | - { | ||
| 148 | - OnlineStream stream = CreateOnlineStream(); | ||
| 149 | - AcceptWaveform(stream._onlineStream, 16000, samplesList[i]); | ||
| 150 | - InputFinished(stream); | ||
| 151 | - streams.Add(stream); | ||
| 152 | - } | ||
| 153 | - return streams; | ||
| 154 | - } | ||
| 155 | - public OnlineStream CreateStream() | ||
| 156 | - { | ||
| 157 | - OnlineStream stream = CreateOnlineStream(); | ||
| 158 | - return stream; | ||
| 159 | - } | ||
| 160 | - internal void AcceptWaveform(SherpaOnnxOnlineStream stream, int sample_rate, float[] samples) | ||
| 161 | - { | ||
| 162 | - SherpaOnnxSharp.AcceptOnlineWaveform(stream, sample_rate, samples, samples.Length); | ||
| 163 | - } | ||
| 164 | - public void AcceptWaveForm(OnlineStream stream, int sample_rate, float[] samples) | ||
| 165 | - { | ||
| 166 | - AcceptWaveform(stream._onlineStream, sample_rate, samples); | ||
| 167 | - } | ||
| 168 | - internal IntPtr GetStreamsIntPtr(OnlineStream[] streams) | ||
| 169 | - { | ||
| 170 | - int streams_len = streams.Length; | ||
| 171 | - int size = Marshal.SizeOf(typeof(SherpaOnnxOnlineStream)); | ||
| 172 | - IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); | ||
| 173 | - unsafe | ||
| 174 | - { | ||
| 175 | - byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); | ||
| 176 | - for (int i = 0; i < streams_len; i++, ptrbds += (size)) | ||
| 177 | - { | ||
| 178 | - IntPtr streamIntptr = new IntPtr(ptrbds); | ||
| 179 | - Marshal.StructureToPtr(streams[i]._onlineStream, streamIntptr, false); | ||
| 180 | - } | ||
| 181 | - | ||
| 182 | - } | ||
| 183 | - return streamsIntPtr; | ||
| 184 | - } | ||
| 185 | - internal bool IsReady(OnlineStream stream) | ||
| 186 | - { | ||
| 187 | - return SherpaOnnxSharp.IsOnlineStreamReady(_onlineRecognizer, stream._onlineStream) != 0; | ||
| 188 | - } | ||
| 189 | - public void DecodeMultipleStreams(List<OnlineStream> streams) | ||
| 190 | - { | ||
| 191 | - while (true) | ||
| 192 | - { | ||
| 193 | - List<OnlineStream> streamList = new List<OnlineStream>(); | ||
| 194 | - foreach (OnlineStream stream in streams) | ||
| 195 | - { | ||
| 196 | - if (IsReady(stream)) | ||
| 197 | - { | ||
| 198 | - streamList.Add(stream); | ||
| 199 | - } | ||
| 200 | - } | ||
| 201 | - if (streamList.Count == 0) | ||
| 202 | - { | ||
| 203 | - break; | ||
| 204 | - } | ||
| 205 | - OnlineStream[] streamsBatch = new OnlineStream[streamList.Count]; | ||
| 206 | - for (int i = 0; i < streamsBatch.Length; i++) | ||
| 207 | - { | ||
| 208 | - streamsBatch[i] = streamList[i]; | ||
| 209 | - } | ||
| 210 | - streamList.Clear(); | ||
| 211 | - IntPtr streamsIntPtr = GetStreamsIntPtr(streamsBatch); | ||
| 212 | - SherpaOnnxSharp.DecodeMultipleOnlineStreams(_onlineRecognizer, streamsIntPtr, streamsBatch.Length); | ||
| 213 | - Marshal.FreeHGlobal(streamsIntPtr); | ||
| 214 | - } | ||
| 215 | - } | ||
| 216 | - public void DecodeStream(OnlineStream stream) | ||
| 217 | - { | ||
| 218 | - while (IsReady(stream)) | ||
| 219 | - { | ||
| 220 | - SherpaOnnxSharp.DecodeOnlineStream(_onlineRecognizer, stream._onlineStream); | ||
| 221 | - } | ||
| 222 | - } | ||
| 223 | - internal OnlineRecognizerResultEntity GetResult(SherpaOnnxOnlineStream stream) | ||
| 224 | - { | ||
| 225 | - IntPtr result_ip = SherpaOnnxSharp.GetOnlineStreamResult(_onlineRecognizer, stream); | ||
| 226 | - OnlineRecognizerResult onlineRecognizerResult = new OnlineRecognizerResult(result_ip); | ||
| 227 | -#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 | ||
| 228 | - SherpaOnnxOnlineRecognizerResult result = | ||
| 229 | - (SherpaOnnxOnlineRecognizerResult)Marshal.PtrToStructure( | ||
| 230 | - onlineRecognizerResult._onlineRecognizerResult, typeof(SherpaOnnxOnlineRecognizerResult)); | ||
| 231 | -#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 | ||
| 232 | - | ||
| 233 | -#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 234 | - string text = Marshal.PtrToStringAnsi(result.text); | ||
| 235 | -#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 236 | - OnlineRecognizerResultEntity onlineRecognizerResultEntity = | ||
| 237 | - new OnlineRecognizerResultEntity(); | ||
| 238 | - onlineRecognizerResultEntity.text = text; | ||
| 239 | - onlineRecognizerResultEntity.text_len = result.text_len; | ||
| 240 | - | ||
| 241 | - return onlineRecognizerResultEntity; | ||
| 242 | - } | ||
| 243 | - public OnlineRecognizerResultEntity GetResult(OnlineStream stream) | ||
| 244 | - { | ||
| 245 | - OnlineRecognizerResultEntity result = GetResult(stream._onlineStream); | ||
| 246 | - return result; | ||
| 247 | - } | ||
| 248 | - public List<OnlineRecognizerResultEntity> GetResults(List<OnlineStream> streams) | ||
| 249 | - { | ||
| 250 | - List<OnlineRecognizerResultEntity> results = new List<OnlineRecognizerResultEntity>(); | ||
| 251 | - foreach (OnlineStream stream in streams) | ||
| 252 | - { | ||
| 253 | - OnlineRecognizerResultEntity onlineRecognizerResultEntity = GetResult(stream._onlineStream); | ||
| 254 | - results.Add(onlineRecognizerResultEntity); | ||
| 255 | - } | ||
| 256 | - return results; | ||
| 257 | - } | ||
| 258 | - protected override void Dispose(bool disposing) | ||
| 259 | - { | ||
| 260 | - if (!disposing) | ||
| 261 | - { | ||
| 262 | - SherpaOnnxSharp.DestroyOnlineRecognizer(_onlineRecognizer); | ||
| 263 | - _onlineRecognizer.impl = IntPtr.Zero; | ||
| 264 | - this._disposed = true; | ||
| 265 | - base.Dispose(); | ||
| 266 | - } | ||
| 267 | - } | ||
| 268 | - } | ||
| 269 | - public class OfflineBase : IDisposable | ||
| 270 | - { | ||
| 271 | - public void Dispose() | ||
| 272 | - { | ||
| 273 | - Dispose(disposing: true); | ||
| 274 | - GC.SuppressFinalize(this); | ||
| 275 | - } | ||
| 276 | - protected virtual void Dispose(bool disposing) | ||
| 277 | - { | ||
| 278 | - if (!disposing) | ||
| 279 | - { | ||
| 280 | - if (_offlineRecognizerResult != IntPtr.Zero) | ||
| 281 | - { | ||
| 282 | - SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); | ||
| 283 | - _offlineRecognizerResult = IntPtr.Zero; | ||
| 284 | - } | ||
| 285 | - if (_offlineStream.impl != IntPtr.Zero) | ||
| 286 | - { | ||
| 287 | - SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); | ||
| 288 | - _offlineStream.impl = IntPtr.Zero; | ||
| 289 | - } | ||
| 290 | - if (_offlineRecognizer.impl != IntPtr.Zero) | ||
| 291 | - { | ||
| 292 | - SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); | ||
| 293 | - _offlineRecognizer.impl = IntPtr.Zero; | ||
| 294 | - } | ||
| 295 | - this._disposed = true; | ||
| 296 | - } | ||
| 297 | - } | ||
| 298 | - ~OfflineBase() | ||
| 299 | - { | ||
| 300 | - Dispose(this._disposed); | ||
| 301 | - } | ||
| 302 | - internal SherpaOnnxOfflineStream _offlineStream; | ||
| 303 | - internal IntPtr _offlineRecognizerResult; | ||
| 304 | - internal SherpaOnnxOfflineRecognizer _offlineRecognizer; | ||
| 305 | - internal bool _disposed = false; | ||
| 306 | - } | ||
| 307 | - public class OfflineStream : OfflineBase | ||
| 308 | - { | ||
| 309 | - internal OfflineStream(SherpaOnnxOfflineStream offlineStream) | ||
| 310 | - { | ||
| 311 | - this._offlineStream = offlineStream; | ||
| 312 | - } | ||
| 313 | - | ||
| 314 | - protected override void Dispose(bool disposing) | ||
| 315 | - { | ||
| 316 | - if (!disposing) | ||
| 317 | - { | ||
| 318 | - SherpaOnnxSharp.DestroyOfflineStream(_offlineStream); | ||
| 319 | - _offlineStream.impl = IntPtr.Zero; | ||
| 320 | - this._disposed = true; | ||
| 321 | - base.Dispose(); | ||
| 322 | - } | ||
| 323 | - } | ||
| 324 | - } | ||
| 325 | - public class OfflineRecognizerResult : OfflineBase | ||
| 326 | - { | ||
| 327 | - internal OfflineRecognizerResult(IntPtr offlineRecognizerResult) | ||
| 328 | - { | ||
| 329 | - this._offlineRecognizerResult = offlineRecognizerResult; | ||
| 330 | - } | ||
| 331 | - protected override void Dispose(bool disposing) | ||
| 332 | - { | ||
| 333 | - if (!disposing) | ||
| 334 | - { | ||
| 335 | - SherpaOnnxSharp.DestroyOfflineRecognizerResult(_offlineRecognizerResult); | ||
| 336 | - _offlineRecognizerResult = IntPtr.Zero; | ||
| 337 | - this._disposed = true; | ||
| 338 | - base.Dispose(disposing); | ||
| 339 | - } | ||
| 340 | - } | ||
| 341 | - } | ||
| 342 | - public class OfflineRecognizer<T> : OfflineBase | ||
| 343 | - where T : class, new() | ||
| 344 | - { | ||
| 345 | - public OfflineRecognizer(T t, | ||
| 346 | - string tokensFilePath, string decoding_method = "greedy_search", | ||
| 347 | - int sample_rate = 16000, int feature_dim = 80, | ||
| 348 | - int num_threads = 2, bool debug = false) | ||
| 349 | - { | ||
| 350 | - SherpaOnnxOfflineTransducer transducer = new SherpaOnnxOfflineTransducer(); | ||
| 351 | - SherpaOnnxOfflineParaformer paraformer = new SherpaOnnxOfflineParaformer(); | ||
| 352 | - SherpaOnnxOfflineNemoEncDecCtc nemo_ctc = new SherpaOnnxOfflineNemoEncDecCtc(); | ||
| 353 | - SherpaOnnxOfflineModelConfig model_config = new SherpaOnnxOfflineModelConfig(); | ||
| 354 | - if (t is not null && t.GetType() == typeof(OfflineTransducer)) | ||
| 355 | - { | ||
| 356 | - OfflineTransducer? offlineTransducer = t as OfflineTransducer; | ||
| 357 | -#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 358 | - Trace.Assert(File.Exists(offlineTransducer.DecoderFilename) | ||
| 359 | - && File.Exists(offlineTransducer.EncoderFilename) | ||
| 360 | - && File.Exists(offlineTransducer.JoinerFilename), "Please provide a model"); | ||
| 361 | -#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 362 | - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 363 | - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 364 | - transducer.encoder_filename = offlineTransducer.EncoderFilename; | ||
| 365 | - transducer.decoder_filename = offlineTransducer.DecoderFilename; | ||
| 366 | - transducer.joiner_filename = offlineTransducer.JoinerFilename; | ||
| 367 | - } | ||
| 368 | - else if (t is not null && t.GetType() == typeof(OfflineParaformer)) | ||
| 369 | - { | ||
| 370 | - OfflineParaformer? offlineParaformer = t as OfflineParaformer; | ||
| 371 | -#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 372 | - Trace.Assert(File.Exists(offlineParaformer.Model), "Please provide a model"); | ||
| 373 | -#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 374 | - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 375 | - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 376 | - paraformer.model = offlineParaformer.Model; | ||
| 377 | - } | ||
| 378 | - else if (t is not null && t.GetType() == typeof(OfflineNemoEncDecCtc)) | ||
| 379 | - { | ||
| 380 | - OfflineNemoEncDecCtc? offlineNemoEncDecCtc = t as OfflineNemoEncDecCtc; | ||
| 381 | -#pragma warning disable CS8602 // 解引用可能出现空引用。 | ||
| 382 | - Trace.Assert(File.Exists(offlineNemoEncDecCtc.Model), "Please provide a model"); | ||
| 383 | -#pragma warning restore CS8602 // 解引用可能出现空引用。 | ||
| 384 | - Trace.Assert(File.Exists(tokensFilePath), "Please provide a tokens"); | ||
| 385 | - Trace.Assert(num_threads > 0, "num_threads must be greater than 0"); | ||
| 386 | - nemo_ctc.model = offlineNemoEncDecCtc.Model; | ||
| 387 | - } | ||
| 388 | - | ||
| 389 | - model_config.transducer = transducer; | ||
| 390 | - model_config.paraformer = paraformer; | ||
| 391 | - model_config.nemo_ctc = nemo_ctc; | ||
| 392 | - model_config.num_threads = num_threads; | ||
| 393 | - model_config.debug = debug; | ||
| 394 | - model_config.tokens = tokensFilePath; | ||
| 395 | - | ||
| 396 | - SherpaOnnxFeatureConfig feat_config = new SherpaOnnxFeatureConfig(); | ||
| 397 | - feat_config.sample_rate = sample_rate; | ||
| 398 | - feat_config.feature_dim = feature_dim; | ||
| 399 | - | ||
| 400 | - SherpaOnnxOfflineRecognizerConfig sherpaOnnxOfflineRecognizerConfig; | ||
| 401 | - sherpaOnnxOfflineRecognizerConfig.decoding_method = decoding_method; | ||
| 402 | - sherpaOnnxOfflineRecognizerConfig.feat_config = feat_config; | ||
| 403 | - sherpaOnnxOfflineRecognizerConfig.model_config = model_config; | ||
| 404 | - | ||
| 405 | - _offlineRecognizer = | ||
| 406 | - SherpaOnnxSharp.CreateOfflineRecognizer(sherpaOnnxOfflineRecognizerConfig); | ||
| 407 | - } | ||
| 408 | - internal OfflineStream CreateOfflineStream() | ||
| 409 | - { | ||
| 410 | - SherpaOnnxOfflineStream stream = SherpaOnnxSharp.CreateOfflineStream(_offlineRecognizer); | ||
| 411 | - return new OfflineStream(stream); | ||
| 412 | - } | ||
| 413 | - public OfflineStream[] CreateOfflineStream(List<float[]> samplesList) | ||
| 414 | - { | ||
| 415 | - int batch_size = samplesList.Count; | ||
| 416 | - OfflineStream[] streams = new OfflineStream[batch_size]; | ||
| 417 | - List<string> wavFiles = new List<string>(); | ||
| 418 | - for (int i = 0; i < batch_size; i++) | ||
| 419 | - { | ||
| 420 | - OfflineStream stream = CreateOfflineStream(); | ||
| 421 | - AcceptWaveform(stream._offlineStream, 16000, samplesList[i]); | ||
| 422 | - streams[i] = stream; | ||
| 423 | - } | ||
| 424 | - return streams; | ||
| 425 | - } | ||
| 426 | - internal void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples) | ||
| 427 | - { | ||
| 428 | - SherpaOnnxSharp.AcceptWaveform(stream, sample_rate, samples, samples.Length); | ||
| 429 | - } | ||
| 430 | - internal IntPtr GetStreamsIntPtr(OfflineStream[] streams) | ||
| 431 | - { | ||
| 432 | - int streams_len = streams.Length; | ||
| 433 | - int size = Marshal.SizeOf(typeof(SherpaOnnxOfflineStream)); | ||
| 434 | - IntPtr streamsIntPtr = Marshal.AllocHGlobal(size * streams_len); | ||
| 435 | - unsafe | ||
| 436 | - { | ||
| 437 | - byte* ptrbds = (byte*)(streamsIntPtr.ToPointer()); | ||
| 438 | - for (int i = 0; i < streams_len; i++, ptrbds += (size)) | ||
| 439 | - { | ||
| 440 | - IntPtr streamIntptr = new IntPtr(ptrbds); | ||
| 441 | - Marshal.StructureToPtr(streams[i]._offlineStream, streamIntptr, false); | ||
| 442 | - } | ||
| 443 | - } | ||
| 444 | - return streamsIntPtr; | ||
| 445 | - } | ||
| 446 | - public void DecodeMultipleOfflineStreams(OfflineStream[] streams) | ||
| 447 | - { | ||
| 448 | - IntPtr streamsIntPtr = GetStreamsIntPtr(streams); | ||
| 449 | - SherpaOnnxSharp.DecodeMultipleOfflineStreams(_offlineRecognizer, streamsIntPtr, streams.Length); | ||
| 450 | - Marshal.FreeHGlobal(streamsIntPtr); | ||
| 451 | - } | ||
| 452 | - internal OfflineRecognizerResultEntity GetResult(SherpaOnnxOfflineStream stream) | ||
| 453 | - { | ||
| 454 | - IntPtr result_ip = SherpaOnnxSharp.GetOfflineStreamResult(stream); | ||
| 455 | - OfflineRecognizerResult offlineRecognizerResult = new OfflineRecognizerResult(result_ip); | ||
| 456 | -#pragma warning disable CS8605 // 取消装箱可能为 null 的值。 | ||
| 457 | - SherpaOnnxOfflineRecognizerResult result = | ||
| 458 | - (SherpaOnnxOfflineRecognizerResult)Marshal.PtrToStructure( | ||
| 459 | - offlineRecognizerResult._offlineRecognizerResult, typeof(SherpaOnnxOfflineRecognizerResult)); | ||
| 460 | -#pragma warning restore CS8605 // 取消装箱可能为 null 的值。 | ||
| 461 | - | ||
| 462 | -#pragma warning disable CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 463 | - string text = Marshal.PtrToStringAnsi(result.text); | ||
| 464 | -#pragma warning restore CS8600 // 将 null 字面量或可能为 null 的值转换为非 null 类型。 | ||
| 465 | - OfflineRecognizerResultEntity offlineRecognizerResultEntity = | ||
| 466 | - new OfflineRecognizerResultEntity(); | ||
| 467 | - offlineRecognizerResultEntity.text = text; | ||
| 468 | - offlineRecognizerResultEntity.text_len = result.text_len; | ||
| 469 | - | ||
| 470 | - return offlineRecognizerResultEntity; | ||
| 471 | - } | ||
| 472 | - public List<OfflineRecognizerResultEntity> GetResults(OfflineStream[] streams) | ||
| 473 | - { | ||
| 474 | - List<OfflineRecognizerResultEntity> results = new List<OfflineRecognizerResultEntity>(); | ||
| 475 | - foreach (OfflineStream stream in streams) | ||
| 476 | - { | ||
| 477 | - OfflineRecognizerResultEntity offlineRecognizerResultEntity = GetResult(stream._offlineStream); | ||
| 478 | - results.Add(offlineRecognizerResultEntity); | ||
| 479 | - } | ||
| 480 | - return results; | ||
| 481 | - } | ||
| 482 | - protected override void Dispose(bool disposing) | ||
| 483 | - { | ||
| 484 | - if (!disposing) | ||
| 485 | - { | ||
| 486 | - SherpaOnnxSharp.DestroyOfflineRecognizer(_offlineRecognizer); | ||
| 487 | - _offlineRecognizer.impl = IntPtr.Zero; | ||
| 488 | - this._disposed = true; | ||
| 489 | - base.Dispose(); | ||
| 490 | - } | ||
| 491 | - } | ||
| 492 | - } | ||
| 493 | - internal static partial class SherpaOnnxSharp | ||
| 494 | - { | ||
| 495 | - private const string dllName = @"SherpaOnnxSharp"; | ||
| 496 | - | ||
| 497 | - [DllImport(dllName, EntryPoint = "CreateOfflineRecognizer", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 498 | - internal static extern SherpaOnnxOfflineRecognizer CreateOfflineRecognizer(SherpaOnnxOfflineRecognizerConfig config); | ||
| 499 | - | ||
| 500 | - [DllImport(dllName, EntryPoint = "CreateOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 501 | - internal static extern SherpaOnnxOfflineStream CreateOfflineStream(SherpaOnnxOfflineRecognizer offlineRecognizer); | ||
| 502 | - | ||
| 503 | - [DllImport(dllName, EntryPoint = "AcceptWaveform", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 504 | - internal static extern void AcceptWaveform(SherpaOnnxOfflineStream stream, int sample_rate, float[] samples, int samples_size); | ||
| 505 | - | ||
| 506 | - [DllImport(dllName, EntryPoint = "DecodeOfflineStream", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 507 | - internal static extern void DecodeOfflineStream(SherpaOnnxOfflineRecognizer recognizer, SherpaOnnxOfflineStream stream); | ||
| 508 | - | ||
| 509 | - [DllImport(dllName, EntryPoint = "DecodeMultipleOfflineStreams", CharSet = CharSet.Ansi, CallingConvention = CallingConvention.Cdecl)] | ||
| 510 | - internal static extern void DecodeMultipleOfflineStreams(SherpaOnnxOfflineRecognizer recognizer, IntPtr | ||
| 511 | - streams, int n); | ||
| 512 | - | ||
| 513 | - [DllImport(dllName, EntryPoint = "GetOfflineStreamResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 514 | - internal static extern IntPtr GetOfflineStreamResult(SherpaOnnxOfflineStream stream); | ||
| 515 | - | ||
| 516 | - [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 517 | - internal static extern void DestroyOfflineRecognizerResult(IntPtr result); | ||
| 518 | - | ||
| 519 | - [DllImport(dllName, EntryPoint = "DestroyOfflineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 520 | - internal static extern void DestroyOfflineStream(SherpaOnnxOfflineStream stream); | ||
| 521 | - | ||
| 522 | - [DllImport(dllName, EntryPoint = "DestroyOfflineRecognizer", CallingConvention = CallingConvention.Cdecl)] | ||
| 523 | - internal static extern void DestroyOfflineRecognizer(SherpaOnnxOfflineRecognizer offlineRecognizer); | ||
| 524 | - | ||
| 525 | - [DllImport(dllName, EntryPoint = "CreateOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] | ||
| 526 | - internal static extern SherpaOnnxOnlineRecognizer CreateOnlineRecognizer(SherpaOnnxOnlineRecognizerConfig config); | ||
| 527 | - | ||
| 528 | - /// Free a pointer returned by CreateOnlineRecognizer() | ||
| 529 | - /// | ||
| 530 | - /// @param p A pointer returned by CreateOnlineRecognizer() | ||
| 531 | - [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizer", CallingConvention = CallingConvention.Cdecl)] | ||
| 532 | - internal static extern void DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer recognizer); | ||
| 533 | - | ||
| 534 | - /// Create an online stream for accepting wave samples. | ||
| 535 | - /// | ||
| 536 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 537 | - /// @return Return a pointer to an OnlineStream. The user has to invoke | ||
| 538 | - /// DestroyOnlineStream() to free it to avoid memory leak. | ||
| 539 | - [DllImport(dllName, EntryPoint = "CreateOnlineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 540 | - internal static extern SherpaOnnxOnlineStream CreateOnlineStream( | ||
| 541 | - SherpaOnnxOnlineRecognizer recognizer); | ||
| 542 | - | ||
| 543 | - /// Destroy an online stream. | ||
| 544 | - /// | ||
| 545 | - /// @param stream A pointer returned by CreateOnlineStream() | ||
| 546 | - [DllImport(dllName, EntryPoint = "DestroyOnlineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 547 | - internal static extern void DestroyOnlineStream(SherpaOnnxOnlineStream stream); | ||
| 548 | - | ||
| 549 | - /// Accept input audio samples and compute the features. | ||
| 550 | - /// The user has to invoke DecodeOnlineStream() to run the neural network and | ||
| 551 | - /// decoding. | ||
| 552 | - /// | ||
| 553 | - /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 554 | - /// @param sample_rate Sample rate of the input samples. If it is different | ||
| 555 | - /// from config.feat_config.sample_rate, we will do | ||
| 556 | - /// resampling inside sherpa-onnx. | ||
| 557 | - /// @param samples A pointer to a 1-D array containing audio samples. | ||
| 558 | - /// The range of samples has to be normalized to [-1, 1]. | ||
| 559 | - /// @param n Number of elements in the samples array. | ||
| 560 | - [DllImport(dllName, EntryPoint = "AcceptOnlineWaveform", CallingConvention = CallingConvention.Cdecl)] | ||
| 561 | - internal static extern void AcceptOnlineWaveform(SherpaOnnxOnlineStream stream, int sample_rate, | ||
| 562 | - float[] samples, int n); | ||
| 563 | - | ||
| 564 | - /// Return 1 if there are enough number of feature frames for decoding. | ||
| 565 | - /// Return 0 otherwise. | ||
| 566 | - /// | ||
| 567 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer | ||
| 568 | - /// @param stream A pointer returned by CreateOnlineStream | ||
| 569 | - [DllImport(dllName, EntryPoint = "IsOnlineStreamReady", CallingConvention = CallingConvention.Cdecl)] | ||
| 570 | - internal static extern int IsOnlineStreamReady(SherpaOnnxOnlineRecognizer recognizer, | ||
| 571 | - SherpaOnnxOnlineStream stream); | ||
| 572 | - | ||
| 573 | - /// Call this function to run the neural network model and decoding. | ||
| 574 | - // | ||
| 575 | - /// Precondition for this function: IsOnlineStreamReady() MUST return 1. | ||
| 576 | - /// | ||
| 577 | - /// Usage example: | ||
| 578 | - /// | ||
| 579 | - /// while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 580 | - /// DecodeOnlineStream(recognizer, stream); | ||
| 581 | - /// } | ||
| 582 | - /// | ||
| 583 | - [DllImport(dllName, EntryPoint = "DecodeOnlineStream", CallingConvention = CallingConvention.Cdecl)] | ||
| 584 | - internal static extern void DecodeOnlineStream(SherpaOnnxOnlineRecognizer recognizer, | ||
| 585 | - SherpaOnnxOnlineStream stream); | ||
| 586 | - | ||
| 587 | - /// This function is similar to DecodeOnlineStream(). It decodes multiple | ||
| 588 | - /// OnlineStream in parallel. | ||
| 589 | - /// | ||
| 590 | - /// Caution: The caller has to ensure each OnlineStream is ready, i.e., | ||
| 591 | - /// IsOnlineStreamReady() for that stream should return 1. | ||
| 592 | - /// | ||
| 593 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 594 | - /// @param streams A pointer array containing pointers returned by | ||
| 595 | - /// CreateOnlineRecognizer() | ||
| 596 | - /// @param n Number of elements in the given streams array. | ||
| 597 | - [DllImport(dllName, EntryPoint = "DecodeMultipleOnlineStreams", CallingConvention = CallingConvention.Cdecl)] | ||
| 598 | - internal static extern void DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer recognizer, | ||
| 599 | - IntPtr streams, int n); | ||
| 600 | - | ||
| 601 | - /// Get the decoding results so far for an OnlineStream. | ||
| 602 | - /// | ||
| 603 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 604 | - /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 605 | - /// @return A pointer containing the result. The user has to invoke | ||
| 606 | - /// DestroyOnlineRecognizerResult() to free the returned pointer to | ||
| 607 | - /// avoid memory leak. | ||
| 608 | - [DllImport(dllName, EntryPoint = "GetOnlineStreamResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 609 | - internal static extern IntPtr GetOnlineStreamResult( | ||
| 610 | - SherpaOnnxOnlineRecognizer recognizer, SherpaOnnxOnlineStream stream); | ||
| 611 | - | ||
| 612 | - /// Destroy the pointer returned by GetOnlineStreamResult(). | ||
| 613 | - /// | ||
| 614 | - /// @param r A pointer returned by GetOnlineStreamResult() | ||
| 615 | - [DllImport(dllName, EntryPoint = "DestroyOnlineRecognizerResult", CallingConvention = CallingConvention.Cdecl)] | ||
| 616 | - internal static extern void DestroyOnlineRecognizerResult(IntPtr result); | ||
| 617 | - | ||
| 618 | - /// Reset an OnlineStream , which clears the neural network model state | ||
| 619 | - /// and the state for decoding. | ||
| 620 | - /// | ||
| 621 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 622 | - /// @param stream A pointer returned by CreateOnlineStream | ||
| 623 | - [DllImport(dllName, EntryPoint = "Reset", CallingConvention = CallingConvention.Cdecl)] | ||
| 624 | - internal static extern void Reset(SherpaOnnxOnlineRecognizer recognizer, | ||
| 625 | - SherpaOnnxOnlineStream stream); | ||
| 626 | - | ||
| 627 | - /// Signal that no more audio samples would be available. | ||
| 628 | - /// After this call, you cannot call AcceptWaveform() any more. | ||
| 629 | - /// | ||
| 630 | - /// @param stream A pointer returned by CreateOnlineStream() | ||
| 631 | - [DllImport(dllName, EntryPoint = "InputFinished", CallingConvention = CallingConvention.Cdecl)] | ||
| 632 | - internal static extern void InputFinished(SherpaOnnxOnlineStream stream); | ||
| 633 | - | ||
| 634 | - /// Return 1 if an endpoint has been detected. | ||
| 635 | - /// | ||
| 636 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 637 | - /// @param stream A pointer returned by CreateOnlineStream() | ||
| 638 | - /// @return Return 1 if an endpoint is detected. Return 0 otherwise. | ||
| 639 | - [DllImport(dllName, EntryPoint = "IsEndpoint", CallingConvention = CallingConvention.Cdecl)] | ||
| 640 | - internal static extern int IsEndpoint(SherpaOnnxOnlineRecognizer recognizer, | ||
| 641 | - SherpaOnnxOnlineStream stream); | ||
| 642 | - } | ||
| 643 | - internal struct SherpaOnnxOfflineTransducer | ||
| 644 | - { | ||
| 645 | - public string encoder_filename; | ||
| 646 | - public string decoder_filename; | ||
| 647 | - public string joiner_filename; | ||
| 648 | - public SherpaOnnxOfflineTransducer() | ||
| 649 | - { | ||
| 650 | - encoder_filename = ""; | ||
| 651 | - decoder_filename = ""; | ||
| 652 | - joiner_filename = ""; | ||
| 653 | - } | ||
| 654 | - }; | ||
| 655 | - internal struct SherpaOnnxOfflineParaformer | ||
| 656 | - { | ||
| 657 | - public string model; | ||
| 658 | - public SherpaOnnxOfflineParaformer() | ||
| 659 | - { | ||
| 660 | - model = ""; | ||
| 661 | - } | ||
| 662 | - }; | ||
| 663 | - internal struct SherpaOnnxOfflineNemoEncDecCtc | ||
| 664 | - { | ||
| 665 | - public string model; | ||
| 666 | - public SherpaOnnxOfflineNemoEncDecCtc() | ||
| 667 | - { | ||
| 668 | - model = ""; | ||
| 669 | - } | ||
| 670 | - }; | ||
| 671 | - internal struct SherpaOnnxOfflineModelConfig | ||
| 672 | - { | ||
| 673 | - public SherpaOnnxOfflineTransducer transducer; | ||
| 674 | - public SherpaOnnxOfflineParaformer paraformer; | ||
| 675 | - public SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; | ||
| 676 | - public string tokens; | ||
| 677 | - public int num_threads; | ||
| 678 | - public bool debug; | ||
| 679 | - }; | ||
| 680 | - /// It expects 16 kHz 16-bit single channel wave format. | ||
| 681 | - internal struct SherpaOnnxFeatureConfig | ||
| 682 | - { | ||
| 683 | - /// Sample rate of the input data. MUST match the one expected | ||
| 684 | - /// by the model. For instance, it should be 16000 for models provided | ||
| 685 | - /// by us. | ||
| 686 | - public int sample_rate; | ||
| 687 | - | ||
| 688 | - /// Feature dimension of the model. | ||
| 689 | - /// For instance, it should be 80 for models provided by us. | ||
| 690 | - public int feature_dim; | ||
| 691 | - }; | ||
| 692 | - internal struct SherpaOnnxOfflineRecognizerConfig | ||
| 693 | - { | ||
| 694 | - public SherpaOnnxFeatureConfig feat_config; | ||
| 695 | - public SherpaOnnxOfflineModelConfig model_config; | ||
| 696 | - | ||
| 697 | - /// Possible values are: greedy_search, modified_beam_search | ||
| 698 | - public string decoding_method; | ||
| 699 | - | ||
| 700 | - }; | ||
| 701 | - internal struct SherpaOnnxOfflineRecognizer | ||
| 702 | - { | ||
| 703 | - public IntPtr impl; | ||
| 704 | - }; | ||
| 705 | - [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] | ||
| 706 | - internal struct SherpaOnnxOfflineStream | ||
| 707 | - { | ||
| 708 | - public IntPtr impl; | ||
| 709 | - }; | ||
| 710 | - internal struct SherpaOnnxOfflineRecognizerResult | ||
| 711 | - { | ||
| 712 | - public IntPtr text; | ||
| 713 | - public int text_len; | ||
| 714 | - } | ||
| 715 | - internal struct SherpaOnnxOnlineTransducer | ||
| 716 | - { | ||
| 717 | - public string encoder_filename; | ||
| 718 | - public string decoder_filename; | ||
| 719 | - public string joiner_filename; | ||
| 720 | - public SherpaOnnxOnlineTransducer() | ||
| 721 | - { | ||
| 722 | - encoder_filename = string.Empty; | ||
| 723 | - decoder_filename = string.Empty; | ||
| 724 | - joiner_filename = string.Empty; | ||
| 725 | - } | ||
| 726 | - }; | ||
| 727 | - internal struct SherpaOnnxOnlineModelConfig | ||
| 728 | - { | ||
| 729 | - public SherpaOnnxOnlineTransducer transducer; | ||
| 730 | - public string tokens; | ||
| 731 | - public int num_threads; | ||
| 732 | - public bool debug; // true to print debug information of the model | ||
| 733 | - }; | ||
| 734 | - internal struct SherpaOnnxOnlineRecognizerConfig | ||
| 735 | - { | ||
| 736 | - public SherpaOnnxFeatureConfig feat_config; | ||
| 737 | - public SherpaOnnxOnlineModelConfig model_config; | ||
| 738 | - | ||
| 739 | - /// Possible values are: greedy_search, modified_beam_search | ||
| 740 | - public string decoding_method; | ||
| 741 | - | ||
| 742 | - /// Used only when decoding_method is modified_beam_search | ||
| 743 | - /// Example value: 4 | ||
| 744 | - public int max_active_paths; | ||
| 745 | - | ||
| 746 | - /// 0 to disable endpoint detection. | ||
| 747 | - /// A non-zero value to enable endpoint detection. | ||
| 748 | - public int enable_endpoint; | ||
| 749 | - | ||
| 750 | - /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 751 | - /// this value even if nothing has been decoded. | ||
| 752 | - /// Used only when enable_endpoint is not 0. | ||
| 753 | - public float rule1_min_trailing_silence; | ||
| 754 | - | ||
| 755 | - /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 756 | - /// this value after something that is not blank has been decoded. | ||
| 757 | - /// Used only when enable_endpoint is not 0. | ||
| 758 | - public float rule2_min_trailing_silence; | ||
| 759 | - | ||
| 760 | - /// An endpoint is detected if the utterance in seconds is larger than | ||
| 761 | - /// this value. | ||
| 762 | - /// Used only when enable_endpoint is not 0. | ||
| 763 | - public float rule3_min_utterance_length; | ||
| 764 | - }; | ||
| 765 | - internal struct SherpaOnnxOnlineRecognizerResult | ||
| 766 | - { | ||
| 767 | - public IntPtr text; | ||
| 768 | - public int text_len; | ||
| 769 | - // TODO: Add more fields | ||
| 770 | - } | ||
| 771 | - internal struct SherpaOnnxOnlineRecognizer | ||
| 772 | - { | ||
| 773 | - public IntPtr impl; | ||
| 774 | - }; | ||
| 775 | - [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Ansi, Pack = 1)] | ||
| 776 | - internal struct SherpaOnnxOnlineStream | ||
| 777 | - { | ||
| 778 | - public IntPtr impl; | ||
| 779 | - }; | ||
| 780 | - public class OfflineNemoEncDecCtc | ||
| 781 | - { | ||
| 782 | - private string model = string.Empty; | ||
| 783 | - public string Model { get => model; set => model = value; } | ||
| 784 | - } | ||
| 785 | - public class OfflineParaformer | ||
| 786 | - { | ||
| 787 | - private string model = string.Empty; | ||
| 788 | - public string Model { get => model; set => model = value; } | ||
| 789 | - } | ||
| 790 | - public class OfflineRecognizerResultEntity | ||
| 791 | - { | ||
| 792 | - /// <summary> | ||
| 793 | - /// recognizer result | ||
| 794 | - /// </summary> | ||
| 795 | - public string? text { get; set; } | ||
| 796 | - /// <summary> | ||
| 797 | - /// recognizer result length | ||
| 798 | - /// </summary> | ||
| 799 | - public int text_len { get; set; } | ||
| 800 | - /// <summary> | ||
| 801 | - /// decode tokens | ||
| 802 | - /// </summary> | ||
| 803 | - public List<string>? tokens { get; set; } | ||
| 804 | - /// <summary> | ||
| 805 | - /// timestamps | ||
| 806 | - /// </summary> | ||
| 807 | - public List<float>? timestamps { get; set; } | ||
| 808 | - } | ||
| 809 | - public class OfflineTransducer | ||
| 810 | - { | ||
| 811 | - private string encoderFilename = string.Empty; | ||
| 812 | - private string decoderFilename = string.Empty; | ||
| 813 | - private string joinerFilename = string.Empty; | ||
| 814 | - public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } | ||
| 815 | - public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } | ||
| 816 | - public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } | ||
| 817 | - } | ||
| 818 | - public class OnlineEndpoint | ||
| 819 | - { | ||
| 820 | - /// 0 to disable endpoint detection. | ||
| 821 | - /// A non-zero value to enable endpoint detection. | ||
| 822 | - private int enableEndpoint; | ||
| 823 | - | ||
| 824 | - /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 825 | - /// this value even if nothing has been decoded. | ||
| 826 | - /// Used only when enable_endpoint is not 0. | ||
| 827 | - private float rule1MinTrailingSilence; | ||
| 828 | - | ||
| 829 | - /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 830 | - /// this value after something that is not blank has been decoded. | ||
| 831 | - /// Used only when enable_endpoint is not 0. | ||
| 832 | - private float rule2MinTrailingSilence; | ||
| 833 | - | ||
| 834 | - /// An endpoint is detected if the utterance in seconds is larger than | ||
| 835 | - /// this value. | ||
| 836 | - /// Used only when enable_endpoint is not 0. | ||
| 837 | - private float rule3MinUtteranceLength; | ||
| 838 | - | ||
| 839 | - public int EnableEndpoint { get => enableEndpoint; set => enableEndpoint = value; } | ||
| 840 | - public float Rule1MinTrailingSilence { get => rule1MinTrailingSilence; set => rule1MinTrailingSilence = value; } | ||
| 841 | - public float Rule2MinTrailingSilence { get => rule2MinTrailingSilence; set => rule2MinTrailingSilence = value; } | ||
| 842 | - public float Rule3MinUtteranceLength { get => rule3MinUtteranceLength; set => rule3MinUtteranceLength = value; } | ||
| 843 | - } | ||
| 844 | - public class OnlineRecognizerResultEntity | ||
| 845 | - { | ||
| 846 | - /// <summary> | ||
| 847 | - /// recognizer result | ||
| 848 | - /// </summary> | ||
| 849 | - public string? text { get; set; } | ||
| 850 | - /// <summary> | ||
| 851 | - /// recognizer result length | ||
| 852 | - /// </summary> | ||
| 853 | - public int text_len { get; set; } | ||
| 854 | - /// <summary> | ||
| 855 | - /// decode tokens | ||
| 856 | - /// </summary> | ||
| 857 | - public List<string>? tokens { get; set; } | ||
| 858 | - /// <summary> | ||
| 859 | - /// timestamps | ||
| 860 | - /// </summary> | ||
| 861 | - public List<float>? timestamps { get; set; } | ||
| 862 | - } | ||
| 863 | - public class OnlineTransducer | ||
| 864 | - { | ||
| 865 | - private string encoderFilename = string.Empty; | ||
| 866 | - private string decoderFilename = string.Empty; | ||
| 867 | - private string joinerFilename = string.Empty; | ||
| 868 | - public string EncoderFilename { get => encoderFilename; set => encoderFilename = value; } | ||
| 869 | - public string DecoderFilename { get => decoderFilename; set => decoderFilename = value; } | ||
| 870 | - public string JoinerFilename { get => joinerFilename; set => joinerFilename = value; } | ||
| 871 | - } | ||
| 872 | -} |
sherpa-onnx/csharp-api/offline-api.cpp
已删除
100644 → 0
| 1 | -// sherpa-onnx/sharp-api/offline-api.cpp | ||
| 2 | -// | ||
| 3 | -// Copyright (c) 2023 Manyeyes Corporation | ||
| 4 | - | ||
| 5 | -#include "offline-api.h" | ||
| 6 | - | ||
| 7 | -#include "sherpa-onnx/csrc/display.h" | ||
| 8 | -#include "sherpa-onnx/csrc/offline-recognizer.h" | ||
| 9 | - | ||
| 10 | -namespace sherpa_onnx | ||
| 11 | -{ | ||
| 12 | - struct SherpaOnnxOfflineRecognizer { | ||
| 13 | - sherpa_onnx::OfflineRecognizer* impl; | ||
| 14 | - }; | ||
| 15 | - | ||
| 16 | - struct SherpaOnnxOfflineStream { | ||
| 17 | - std::unique_ptr<sherpa_onnx::OfflineStream> impl; | ||
| 18 | - explicit SherpaOnnxOfflineStream(std::unique_ptr<sherpa_onnx::OfflineStream> p) | ||
| 19 | - : impl(std::move(p)) {} | ||
| 20 | - }; | ||
| 21 | - | ||
| 22 | - struct SherpaOnnxDisplay { | ||
| 23 | - std::unique_ptr<sherpa_onnx::Display> impl; | ||
| 24 | - }; | ||
| 25 | - | ||
| 26 | - SherpaOnnxOfflineRecognizer* __stdcall CreateOfflineRecognizer( | ||
| 27 | - const SherpaOnnxOfflineRecognizerConfig* config) { | ||
| 28 | - sherpa_onnx::OfflineRecognizerConfig recognizer_config; | ||
| 29 | - | ||
| 30 | - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | ||
| 31 | - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | ||
| 32 | - | ||
| 33 | - if (strlen(config->model_config.transducer.encoder_filename) > 0) { | ||
| 34 | - recognizer_config.model_config.transducer.encoder_filename = | ||
| 35 | - config->model_config.transducer.encoder_filename; | ||
| 36 | - recognizer_config.model_config.transducer.decoder_filename = | ||
| 37 | - config->model_config.transducer.decoder_filename; | ||
| 38 | - recognizer_config.model_config.transducer.joiner_filename = | ||
| 39 | - config->model_config.transducer.joiner_filename; | ||
| 40 | - } | ||
| 41 | - else if (strlen(config->model_config.paraformer.model) > 0) { | ||
| 42 | - recognizer_config.model_config.paraformer.model = | ||
| 43 | - config->model_config.paraformer.model; | ||
| 44 | - } | ||
| 45 | - else if (strlen(config->model_config.nemo_ctc.model) > 0) { | ||
| 46 | - recognizer_config.model_config.nemo_ctc.model = | ||
| 47 | - config->model_config.nemo_ctc.model; | ||
| 48 | - } | ||
| 49 | - | ||
| 50 | - recognizer_config.model_config.tokens = | ||
| 51 | - config->model_config.tokens; | ||
| 52 | - recognizer_config.model_config.num_threads = | ||
| 53 | - config->model_config.num_threads; | ||
| 54 | - recognizer_config.model_config.debug = | ||
| 55 | - config->model_config.debug; | ||
| 56 | - | ||
| 57 | - recognizer_config.decoding_method = config->decoding_method; | ||
| 58 | - | ||
| 59 | - SherpaOnnxOfflineRecognizer* recognizer = | ||
| 60 | - new SherpaOnnxOfflineRecognizer; | ||
| 61 | - recognizer->impl = | ||
| 62 | - new sherpa_onnx::OfflineRecognizer(recognizer_config); | ||
| 63 | - | ||
| 64 | - return recognizer; | ||
| 65 | - } | ||
| 66 | - | ||
| 67 | - SherpaOnnxOfflineStream* __stdcall CreateOfflineStream( | ||
| 68 | - SherpaOnnxOfflineRecognizer* recognizer) { | ||
| 69 | - SherpaOnnxOfflineStream* stream = | ||
| 70 | - new SherpaOnnxOfflineStream(recognizer->impl->CreateStream()); | ||
| 71 | - return stream; | ||
| 72 | - } | ||
| 73 | - | ||
| 74 | - void __stdcall AcceptWaveform( | ||
| 75 | - SherpaOnnxOfflineStream* stream, | ||
| 76 | - int32_t sample_rate, | ||
| 77 | - const float* samples, int32_t samples_size) { | ||
| 78 | - std::vector<float> waveform{ samples, samples + samples_size }; | ||
| 79 | - stream->impl->AcceptWaveform(sample_rate, waveform.data(), waveform.size()); | ||
| 80 | - } | ||
| 81 | - | ||
| 82 | - void __stdcall DecodeOfflineStream( | ||
| 83 | - SherpaOnnxOfflineRecognizer* recognizer, | ||
| 84 | - SherpaOnnxOfflineStream* stream) { | ||
| 85 | - recognizer->impl->DecodeStream(stream->impl.get()); | ||
| 86 | - } | ||
| 87 | - | ||
| 88 | - void __stdcall DecodeMultipleOfflineStreams( | ||
| 89 | - SherpaOnnxOfflineRecognizer* recognizer, | ||
| 90 | - SherpaOnnxOfflineStream** streams, int32_t n) { | ||
| 91 | - std::vector<sherpa_onnx::OfflineStream*> ss(n); | ||
| 92 | - for (int32_t i = 0; i != n; ++i) { | ||
| 93 | - ss[i] = streams[i]->impl.get(); | ||
| 94 | - } | ||
| 95 | - recognizer->impl->DecodeStreams(ss.data(), n); | ||
| 96 | - } | ||
| 97 | - | ||
| 98 | - SherpaOnnxOfflineRecognizerResult* __stdcall GetOfflineStreamResult( | ||
| 99 | - SherpaOnnxOfflineStream* stream) { | ||
| 100 | - sherpa_onnx::OfflineRecognitionResult result = | ||
| 101 | - stream->impl->GetResult(); | ||
| 102 | - const auto& text = result.text; | ||
| 103 | - auto r = new SherpaOnnxOfflineRecognizerResult; | ||
| 104 | - r->text = new char[text.size() + 1]; | ||
| 105 | - std::copy(text.begin(), text.end(), const_cast<char*>(r->text)); | ||
| 106 | - const_cast<char*>(r->text)[text.size()] = 0; | ||
| 107 | - r->text_len = text.size(); | ||
| 108 | - return r; | ||
| 109 | - } | ||
| 110 | - | ||
| 111 | - | ||
| 112 | - /// Free a pointer returned by CreateOfflineRecognizer() | ||
| 113 | - /// | ||
| 114 | - /// @param p A pointer returned by CreateOfflineRecognizer() | ||
| 115 | - void __stdcall DestroyOfflineRecognizer( | ||
| 116 | - SherpaOnnxOfflineRecognizer* recognizer) { | ||
| 117 | - delete recognizer->impl; | ||
| 118 | - delete recognizer; | ||
| 119 | - } | ||
| 120 | - | ||
| 121 | - /// Destory an offline stream. | ||
| 122 | - /// | ||
| 123 | - /// @param stream A pointer returned by CreateOfflineStream() | ||
| 124 | - void __stdcall DestroyOfflineStream(SherpaOnnxOfflineStream* stream) { | ||
| 125 | - delete stream; | ||
| 126 | - } | ||
| 127 | - | ||
| 128 | - /// Destroy the pointer returned by GetOfflineStreamResult(). | ||
| 129 | - /// | ||
| 130 | - /// @param r A pointer returned by GetOfflineStreamResult() | ||
| 131 | - void __stdcall DestroyOfflineRecognizerResult( | ||
| 132 | - SherpaOnnxOfflineRecognizerResult* r) { | ||
| 133 | - delete r->text; | ||
| 134 | - delete r; | ||
| 135 | - } | ||
| 136 | -}// namespace sherpa_onnx |
sherpa-onnx/csharp-api/offline-api.h
已删除
100644 → 0
| 1 | -// sherpa-onnx/sharp-api/offline-api.h | ||
| 2 | -// | ||
| 3 | -// Copyright (c) 2023 Manyeyes Corporation | ||
| 4 | - | ||
| 5 | -#pragma once | ||
| 6 | - | ||
| 7 | -#include <list> | ||
| 8 | - | ||
| 9 | -namespace sherpa_onnx | ||
| 10 | -{ | ||
| 11 | - /// Please refer to | ||
| 12 | - /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 13 | - /// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 14 | - /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 15 | - /// from there. | ||
| 16 | - typedef struct SherpaOnnxOfflineTransducer { | ||
| 17 | - const char* encoder_filename; | ||
| 18 | - const char* decoder_filename; | ||
| 19 | - const char* joiner_filename; | ||
| 20 | - } SherpaOnnxOfflineTransducer; | ||
| 21 | - | ||
| 22 | - typedef struct SherpaOnnxOfflineParaformer { | ||
| 23 | - const char* model; | ||
| 24 | - }SherpaOnnxOfflineParaformer; | ||
| 25 | - | ||
| 26 | - typedef struct SherpaOnnxOfflineNemoEncDecCtc { | ||
| 27 | - const char* model; | ||
| 28 | - }SherpaOnnxOfflineNemoEncDecCtc; | ||
| 29 | - | ||
| 30 | - | ||
| 31 | - typedef struct SherpaOnnxOfflineModelConfig { | ||
| 32 | - SherpaOnnxOfflineTransducer transducer; | ||
| 33 | - SherpaOnnxOfflineParaformer paraformer; | ||
| 34 | - SherpaOnnxOfflineNemoEncDecCtc nemo_ctc; | ||
| 35 | - const char* tokens; | ||
| 36 | - const int32_t num_threads; | ||
| 37 | - const bool debug; | ||
| 38 | - } SherpaOnnxOfflineModelConfig; | ||
| 39 | - | ||
| 40 | - /// It expects 16 kHz 16-bit single channel wave format. | ||
| 41 | - typedef struct SherpaOnnxFeatureConfig { | ||
| 42 | - /// Sample rate of the input data. MUST match the one expected | ||
| 43 | - /// by the model. For instance, it should be 16000 for models provided | ||
| 44 | - /// by us. | ||
| 45 | - int32_t sample_rate; | ||
| 46 | - | ||
| 47 | - /// Feature dimension of the model. | ||
| 48 | - /// For instance, it should be 80 for models provided by us. | ||
| 49 | - int32_t feature_dim; | ||
| 50 | - } SherpaOnnxFeatureConfig; | ||
| 51 | - | ||
| 52 | - typedef struct SherpaOnnxOfflineRecognizerConfig { | ||
| 53 | - SherpaOnnxFeatureConfig feat_config; | ||
| 54 | - SherpaOnnxOfflineModelConfig model_config; | ||
| 55 | - | ||
| 56 | - /// Possible values are: greedy_search, modified_beam_search | ||
| 57 | - const char* decoding_method; | ||
| 58 | - | ||
| 59 | - } SherpaOnnxOfflineRecognizerConfig; | ||
| 60 | - | ||
| 61 | - typedef struct SherpaOnnxOfflineRecognizerResult { | ||
| 62 | - // Recognition results. | ||
| 63 | - // For English, it consists of space separated words. | ||
| 64 | - // For Chinese, it consists of Chinese words without spaces. | ||
| 65 | - char* text; | ||
| 66 | - int text_len; | ||
| 67 | - | ||
| 68 | - // Decoded results at the token level. | ||
| 69 | - // For instance, for BPE-based models it consists of a list of BPE tokens. | ||
| 70 | - // std::vector<std::string> tokens; | ||
| 71 | - | ||
| 72 | - // timestamps.size() == tokens.size() | ||
| 73 | - // timestamps[i] records the time in seconds when tokens[i] is decoded. | ||
| 74 | - // std::vector<float> timestamps; | ||
| 75 | - } SherpaOnnxOfflineRecognizerResult; | ||
| 76 | - | ||
| 77 | - /// Note: OfflineRecognizer here means StreamingRecognizer. | ||
| 78 | - /// It does not need to access the Internet during recognition. | ||
| 79 | - /// Everything is run locally. | ||
| 80 | - typedef struct SherpaOnnxOfflineRecognizer SherpaOnnxOfflineRecognizer; | ||
| 81 | - | ||
| 82 | - typedef struct SherpaOnnxOfflineStream SherpaOnnxOfflineStream; | ||
| 83 | - | ||
| 84 | - extern "C" __declspec(dllexport) | ||
| 85 | - SherpaOnnxOfflineRecognizer * __stdcall CreateOfflineRecognizer( | ||
| 86 | - const SherpaOnnxOfflineRecognizerConfig * config); | ||
| 87 | - | ||
| 88 | - extern "C" __declspec(dllexport) | ||
| 89 | - SherpaOnnxOfflineStream * __stdcall CreateOfflineStream( | ||
| 90 | - SherpaOnnxOfflineRecognizer * sherpaOnnxOfflineRecognizer); | ||
| 91 | - | ||
| 92 | - extern "C" __declspec(dllexport) | ||
| 93 | - void __stdcall AcceptWaveform( | ||
| 94 | - SherpaOnnxOfflineStream * stream, int32_t sample_rate, | ||
| 95 | - const float* samples, int32_t samples_size); | ||
| 96 | - | ||
| 97 | - extern "C" __declspec(dllexport) | ||
| 98 | - void __stdcall DecodeOfflineStream( | ||
| 99 | - SherpaOnnxOfflineRecognizer * recognizer, | ||
| 100 | - SherpaOnnxOfflineStream * stream); | ||
| 101 | - | ||
| 102 | - extern "C" __declspec(dllexport) | ||
| 103 | - void __stdcall DecodeMultipleOfflineStreams( | ||
| 104 | - SherpaOnnxOfflineRecognizer * recognizer, | ||
| 105 | - SherpaOnnxOfflineStream * *streams, int32_t n); | ||
| 106 | - | ||
| 107 | - extern "C" __declspec(dllexport) | ||
| 108 | - SherpaOnnxOfflineRecognizerResult * __stdcall GetOfflineStreamResult( | ||
| 109 | - SherpaOnnxOfflineStream * stream); | ||
| 110 | - | ||
| 111 | - extern "C" __declspec(dllexport) | ||
| 112 | - void __stdcall DestroyOfflineRecognizer( | ||
| 113 | - SherpaOnnxOfflineRecognizer * recognizer); | ||
| 114 | - | ||
| 115 | - extern "C" __declspec(dllexport) | ||
| 116 | - void __stdcall DestroyOfflineStream( | ||
| 117 | - SherpaOnnxOfflineStream * stream); | ||
| 118 | - | ||
| 119 | - extern "C" __declspec(dllexport) | ||
| 120 | - void __stdcall DestroyOfflineRecognizerResult( | ||
| 121 | - SherpaOnnxOfflineRecognizerResult * r); | ||
| 122 | -}// namespace sherpa_onnx |
sherpa-onnx/csharp-api/online-api.cc
已删除
100644 → 0
| 1 | -// sherpa-onnx/cpp-api/c-api.cc | ||
| 2 | -// | ||
| 3 | -// Copyright (c) 2023 Xiaomi Corporation | ||
| 4 | - | ||
| 5 | -#include "online-api.h" | ||
| 6 | - | ||
| 7 | -#include <algorithm> | ||
| 8 | -#include <memory> | ||
| 9 | -#include <utility> | ||
| 10 | -#include <vector> | ||
| 11 | - | ||
| 12 | -#include "../../sherpa-onnx/csrc/display.h" | ||
| 13 | -#include "../../sherpa-onnx/csrc/online-recognizer.h" | ||
| 14 | -namespace sherpa_onnx | ||
| 15 | -{ | ||
| 16 | - struct SherpaOnnxOnlineRecognizer { | ||
| 17 | - sherpa_onnx::OnlineRecognizer* impl; | ||
| 18 | - }; | ||
| 19 | - | ||
| 20 | - struct SherpaOnnxOnlineStream { | ||
| 21 | - std::unique_ptr<sherpa_onnx::OnlineStream> impl; | ||
| 22 | - explicit SherpaOnnxOnlineStream(std::unique_ptr<sherpa_onnx::OnlineStream> p) | ||
| 23 | - : impl(std::move(p)) {} | ||
| 24 | - }; | ||
| 25 | - | ||
| 26 | - struct SherpaOnnxDisplay { | ||
| 27 | - std::unique_ptr<sherpa_onnx::Display> impl; | ||
| 28 | - }; | ||
| 29 | - | ||
| 30 | - SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer( | ||
| 31 | - const SherpaOnnxOnlineRecognizerConfig* config) { | ||
| 32 | - sherpa_onnx::OnlineRecognizerConfig recognizer_config; | ||
| 33 | - | ||
| 34 | - recognizer_config.feat_config.sampling_rate = config->feat_config.sample_rate; | ||
| 35 | - recognizer_config.feat_config.feature_dim = config->feat_config.feature_dim; | ||
| 36 | - | ||
| 37 | - recognizer_config.model_config.encoder_filename = | ||
| 38 | - config->model_config.transducer.encoder; | ||
| 39 | - recognizer_config.model_config.decoder_filename = | ||
| 40 | - config->model_config.transducer.decoder; | ||
| 41 | - recognizer_config.model_config.joiner_filename = config->model_config.transducer.joiner; | ||
| 42 | - recognizer_config.model_config.tokens = config->model_config.tokens; | ||
| 43 | - recognizer_config.model_config.num_threads = config->model_config.num_threads; | ||
| 44 | - recognizer_config.model_config.debug = config->model_config.debug; | ||
| 45 | - | ||
| 46 | - recognizer_config.decoding_method = config->decoding_method; | ||
| 47 | - recognizer_config.max_active_paths = config->max_active_paths; | ||
| 48 | - | ||
| 49 | - recognizer_config.enable_endpoint = config->enable_endpoint; | ||
| 50 | - | ||
| 51 | - recognizer_config.endpoint_config.rule1.min_trailing_silence = | ||
| 52 | - config->rule1_min_trailing_silence; | ||
| 53 | - | ||
| 54 | - recognizer_config.endpoint_config.rule2.min_trailing_silence = | ||
| 55 | - config->rule2_min_trailing_silence; | ||
| 56 | - | ||
| 57 | - recognizer_config.endpoint_config.rule3.min_utterance_length = | ||
| 58 | - config->rule3_min_utterance_length; | ||
| 59 | - | ||
| 60 | - SherpaOnnxOnlineRecognizer* recognizer = new SherpaOnnxOnlineRecognizer; | ||
| 61 | - recognizer->impl = new sherpa_onnx::OnlineRecognizer(recognizer_config); | ||
| 62 | - | ||
| 63 | - return recognizer; | ||
| 64 | - } | ||
| 65 | - | ||
| 66 | - void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer) { | ||
| 67 | - delete recognizer->impl; | ||
| 68 | - delete recognizer; | ||
| 69 | - } | ||
| 70 | - | ||
| 71 | - SherpaOnnxOnlineStream* __stdcall CreateOnlineStream( | ||
| 72 | - const SherpaOnnxOnlineRecognizer* recognizer) { | ||
| 73 | - SherpaOnnxOnlineStream* stream = | ||
| 74 | - new SherpaOnnxOnlineStream(recognizer->impl->CreateStream()); | ||
| 75 | - return stream; | ||
| 76 | - } | ||
| 77 | - | ||
| 78 | - void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream) { delete stream; } | ||
| 79 | - | ||
| 80 | - void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate, | ||
| 81 | - const float* samples, int32_t n) { | ||
| 82 | - stream->impl->AcceptWaveform(sample_rate, samples, n); | ||
| 83 | - } | ||
| 84 | - | ||
| 85 | - int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 86 | - SherpaOnnxOnlineStream* stream) { | ||
| 87 | - return recognizer->impl->IsReady(stream->impl.get()); | ||
| 88 | - } | ||
| 89 | - | ||
| 90 | - void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 91 | - SherpaOnnxOnlineStream* stream) { | ||
| 92 | - recognizer->impl->DecodeStream(stream->impl.get()); | ||
| 93 | - } | ||
| 94 | - | ||
| 95 | - void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 96 | - SherpaOnnxOnlineStream** streams, int32_t n) { | ||
| 97 | - std::vector<sherpa_onnx::OnlineStream*> ss(n); | ||
| 98 | - for (int32_t i = 0; i != n; ++i) { | ||
| 99 | - ss[i] = streams[i]->impl.get(); | ||
| 100 | - } | ||
| 101 | - recognizer->impl->DecodeStreams(ss.data(), n); | ||
| 102 | - } | ||
| 103 | - | ||
| 104 | - SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult( | ||
| 105 | - SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream) { | ||
| 106 | - sherpa_onnx::OnlineRecognizerResult result = | ||
| 107 | - recognizer->impl->GetResult(stream->impl.get()); | ||
| 108 | - const auto& text = result.text; | ||
| 109 | - | ||
| 110 | - auto r = new SherpaOnnxOnlineRecognizerResult; | ||
| 111 | - r->text = new char[text.size() + 1]; | ||
| 112 | - std::copy(text.begin(), text.end(), const_cast<char*>(r->text)); | ||
| 113 | - const_cast<char*>(r->text)[text.size()] = 0; | ||
| 114 | - r->text_len = text.size(); | ||
| 115 | - return r; | ||
| 116 | - } | ||
| 117 | - | ||
| 118 | - void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r) { | ||
| 119 | - delete[] r->text; | ||
| 120 | - delete r; | ||
| 121 | - } | ||
| 122 | - | ||
| 123 | - void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 124 | - SherpaOnnxOnlineStream* stream) { | ||
| 125 | - recognizer->impl->Reset(stream->impl.get()); | ||
| 126 | - } | ||
| 127 | - | ||
| 128 | - void __stdcall InputFinished(SherpaOnnxOnlineStream* stream) { | ||
| 129 | - stream->impl->InputFinished(); | ||
| 130 | - } | ||
| 131 | - | ||
| 132 | - int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 133 | - SherpaOnnxOnlineStream* stream) { | ||
| 134 | - return recognizer->impl->IsEndpoint(stream->impl.get()); | ||
| 135 | - } | ||
| 136 | - | ||
| 137 | - SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line) { | ||
| 138 | - SherpaOnnxDisplay* ans = new SherpaOnnxDisplay; | ||
| 139 | - ans->impl = std::make_unique<sherpa_onnx::Display>(max_word_per_line); | ||
| 140 | - return ans; | ||
| 141 | - } | ||
| 142 | - | ||
| 143 | - void __stdcall DestroyDisplay(SherpaOnnxDisplay* display) { delete display; } | ||
| 144 | - | ||
| 145 | - void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s) { | ||
| 146 | - display->impl->Print(idx, s); | ||
| 147 | - } | ||
| 148 | -} |
sherpa-onnx/csharp-api/online-api.h
已删除
100644 → 0
| 1 | -// sherpa-onnx/cpp-api/c-api.h | ||
| 2 | -// | ||
| 3 | -// Copyright (c) 2023 Xiaomi Corporation | ||
| 4 | - | ||
| 5 | -// C API for sherpa-onnx | ||
| 6 | -// | ||
| 7 | -// Please refer to | ||
| 8 | -// https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c | ||
| 9 | -// for usages. | ||
| 10 | -// | ||
| 11 | - | ||
| 12 | -#ifndef SHERPA_ONNX_CPP_API_C_API_H_ | ||
| 13 | -#define SHERPA_ONNX_CPP_API_C_API_H_ | ||
| 14 | - | ||
| 15 | -#include <stdint.h> | ||
| 16 | - | ||
| 17 | -#ifdef __cplusplus | ||
| 18 | -extern "C" { | ||
| 19 | -#endif | ||
| 20 | - namespace sherpa_onnx | ||
| 21 | - { | ||
| 22 | - /// Please refer to | ||
| 23 | - /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html | ||
| 24 | - /// to download pre-trained models. That is, you can find encoder-xxx.onnx | ||
| 25 | - /// decoder-xxx.onnx, joiner-xxx.onnx, and tokens.txt for this struct | ||
| 26 | - /// from there. | ||
| 27 | - typedef struct SherpaOnnxOnlineTransducer { | ||
| 28 | - const char* encoder; | ||
| 29 | - const char* decoder; | ||
| 30 | - const char* joiner; | ||
| 31 | - } SherpaOnnxOnlineTransducer; | ||
| 32 | - | ||
| 33 | - typedef struct SherpaOnnxOnlineModelConfig | ||
| 34 | - { | ||
| 35 | - const SherpaOnnxOnlineTransducer transducer; | ||
| 36 | - const char* tokens; | ||
| 37 | - const int32_t num_threads; | ||
| 38 | - const bool debug; // true to print debug information of the model | ||
| 39 | - }SherpaOnnxOnlineModelConfig; | ||
| 40 | - | ||
| 41 | - /// It expects 16 kHz 16-bit single channel wave format. | ||
| 42 | - typedef struct SherpaOnnxFeatureConfig { | ||
| 43 | - /// Sample rate of the input data. MUST match the one expected | ||
| 44 | - /// by the model. For instance, it should be 16000 for models provided | ||
| 45 | - /// by us. | ||
| 46 | - int32_t sample_rate; | ||
| 47 | - | ||
| 48 | - /// Feature dimension of the model. | ||
| 49 | - /// For instance, it should be 80 for models provided by us. | ||
| 50 | - int32_t feature_dim; | ||
| 51 | - } SherpaOnnxFeatureConfig; | ||
| 52 | - | ||
| 53 | - typedef struct SherpaOnnxOnlineRecognizerConfig { | ||
| 54 | - SherpaOnnxFeatureConfig feat_config; | ||
| 55 | - SherpaOnnxOnlineModelConfig model_config; | ||
| 56 | - | ||
| 57 | - /// Possible values are: greedy_search, modified_beam_search | ||
| 58 | - const char* decoding_method; | ||
| 59 | - | ||
| 60 | - /// Used only when decoding_method is modified_beam_search | ||
| 61 | - /// Example value: 4 | ||
| 62 | - int32_t max_active_paths; | ||
| 63 | - | ||
| 64 | - /// 0 to disable endpoint detection. | ||
| 65 | - /// A non-zero value to enable endpoint detection. | ||
| 66 | - int enable_endpoint; | ||
| 67 | - | ||
| 68 | - /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 69 | - /// this value even if nothing has been decoded. | ||
| 70 | - /// Used only when enable_endpoint is not 0. | ||
| 71 | - float rule1_min_trailing_silence; | ||
| 72 | - | ||
| 73 | - /// An endpoint is detected if trailing silence in seconds is larger than | ||
| 74 | - /// this value after something that is not blank has been decoded. | ||
| 75 | - /// Used only when enable_endpoint is not 0. | ||
| 76 | - float rule2_min_trailing_silence; | ||
| 77 | - | ||
| 78 | - /// An endpoint is detected if the utterance in seconds is larger than | ||
| 79 | - /// this value. | ||
| 80 | - /// Used only when enable_endpoint is not 0. | ||
| 81 | - float rule3_min_utterance_length; | ||
| 82 | - } SherpaOnnxOnlineRecognizerConfig; | ||
| 83 | - | ||
| 84 | - typedef struct SherpaOnnxOnlineRecognizerResult { | ||
| 85 | - const char* text; | ||
| 86 | - int text_len; | ||
| 87 | - // TODO(fangjun): Add more fields | ||
| 88 | - } SherpaOnnxOnlineRecognizerResult; | ||
| 89 | - | ||
| 90 | - /// Note: OnlineRecognizer here means StreamingRecognizer. | ||
| 91 | - /// It does not need to access the Internet during recognition. | ||
| 92 | - /// Everything is run locally. | ||
| 93 | - typedef struct SherpaOnnxOnlineRecognizer SherpaOnnxOnlineRecognizer; | ||
| 94 | - typedef struct SherpaOnnxOnlineStream SherpaOnnxOnlineStream; | ||
| 95 | - | ||
| 96 | - /// @param config Config for the recongizer. | ||
| 97 | - /// @return Return a pointer to the recognizer. The user has to invoke | ||
| 98 | - // DestroyOnlineRecognizer() to free it to avoid memory leak. | ||
| 99 | - extern "C" __declspec(dllexport) | ||
| 100 | - SherpaOnnxOnlineRecognizer* __stdcall CreateOnlineRecognizer( | ||
| 101 | - const SherpaOnnxOnlineRecognizerConfig * config); | ||
| 102 | - | ||
| 103 | - /// Free a pointer returned by CreateOnlineRecognizer() | ||
| 104 | - /// | ||
| 105 | - /// @param p A pointer returned by CreateOnlineRecognizer() | ||
| 106 | - extern "C" __declspec(dllexport) | ||
| 107 | - void __stdcall DestroyOnlineRecognizer(SherpaOnnxOnlineRecognizer* recognizer); | ||
| 108 | - | ||
| 109 | - /// Create an online stream for accepting wave samples. | ||
| 110 | - /// | ||
| 111 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 112 | - /// @return Return a pointer to an OnlineStream. The user has to invoke | ||
| 113 | - /// DestroyOnlineStream() to free it to avoid memory leak. | ||
| 114 | - extern "C" __declspec(dllexport) | ||
| 115 | - SherpaOnnxOnlineStream* __stdcall CreateOnlineStream( | ||
| 116 | - const SherpaOnnxOnlineRecognizer* recognizer); | ||
| 117 | - | ||
| 118 | - /// Destroy an online stream. | ||
| 119 | - /// | ||
| 120 | - /// @param stream A pointer returned by CreateOnlineStream() | ||
| 121 | - extern "C" __declspec(dllexport) | ||
| 122 | - void __stdcall DestroyOnlineStream(SherpaOnnxOnlineStream* stream); | ||
| 123 | - | ||
| 124 | - /// Accept input audio samples and compute the features. | ||
| 125 | - /// The user has to invoke DecodeOnlineStream() to run the neural network and | ||
| 126 | - /// decoding. | ||
| 127 | - /// | ||
| 128 | - /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 129 | - /// @param sample_rate Sample rate of the input samples. If it is different | ||
| 130 | - /// from config.feat_config.sample_rate, we will do | ||
| 131 | - /// resampling inside sherpa-onnx. | ||
| 132 | - /// @param samples A pointer to a 1-D array containing audio samples. | ||
| 133 | - /// The range of samples has to be normalized to [-1, 1]. | ||
| 134 | - /// @param n Number of elements in the samples array. | ||
| 135 | - extern "C" __declspec(dllexport) | ||
| 136 | - void __stdcall AcceptOnlineWaveform(SherpaOnnxOnlineStream* stream, int32_t sample_rate, | ||
| 137 | - const float* samples, int32_t n); | ||
| 138 | - | ||
| 139 | - /// Return 1 if there are enough number of feature frames for decoding. | ||
| 140 | - /// Return 0 otherwise. | ||
| 141 | - /// | ||
| 142 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer | ||
| 143 | - /// @param stream A pointer returned by CreateOnlineStream | ||
| 144 | - extern "C" __declspec(dllexport) | ||
| 145 | - int32_t __stdcall IsOnlineStreamReady(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 146 | - SherpaOnnxOnlineStream* stream); | ||
| 147 | - | ||
| 148 | - /// Call this function to run the neural network model and decoding. | ||
| 149 | - // | ||
| 150 | - /// Precondition for this function: IsOnlineStreamReady() MUST return 1. | ||
| 151 | - /// | ||
| 152 | - /// Usage example: | ||
| 153 | - /// | ||
| 154 | - /// while (IsOnlineStreamReady(recognizer, stream)) { | ||
| 155 | - /// DecodeOnlineStream(recognizer, stream); | ||
| 156 | - /// } | ||
| 157 | - /// | ||
| 158 | - extern "C" __declspec(dllexport) | ||
| 159 | - void __stdcall DecodeOnlineStream(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 160 | - SherpaOnnxOnlineStream* stream); | ||
| 161 | - | ||
| 162 | - /// This function is similar to DecodeOnlineStream(). It decodes multiple | ||
| 163 | - /// OnlineStream in parallel. | ||
| 164 | - /// | ||
| 165 | - /// Caution: The caller has to ensure each OnlineStream is ready, i.e., | ||
| 166 | - /// IsOnlineStreamReady() for that stream should return 1. | ||
| 167 | - /// | ||
| 168 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 169 | - /// @param streams A pointer array containing pointers returned by | ||
| 170 | - /// CreateOnlineRecognizer() | ||
| 171 | - /// @param n Number of elements in the given streams array. | ||
| 172 | - extern "C" __declspec(dllexport) | ||
| 173 | - void __stdcall DecodeMultipleOnlineStreams(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 174 | - SherpaOnnxOnlineStream** streams, int32_t n); | ||
| 175 | - | ||
| 176 | - /// Get the decoding results so far for an OnlineStream. | ||
| 177 | - /// | ||
| 178 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 179 | - /// @param stream A pointer returned by CreateOnlineStream(). | ||
| 180 | - /// @return A pointer containing the result. The user has to invoke | ||
| 181 | - /// DestroyOnlineRecognizerResult() to free the returned pointer to | ||
| 182 | - /// avoid memory leak. | ||
| 183 | - extern "C" __declspec(dllexport) | ||
| 184 | - SherpaOnnxOnlineRecognizerResult* __stdcall GetOnlineStreamResult( | ||
| 185 | - SherpaOnnxOnlineRecognizer* recognizer, SherpaOnnxOnlineStream* stream); | ||
| 186 | - | ||
| 187 | - /// Destroy the pointer returned by GetOnlineStreamResult(). | ||
| 188 | - /// | ||
| 189 | - /// @param r A pointer returned by GetOnlineStreamResult() | ||
| 190 | - extern "C" __declspec(dllexport) | ||
| 191 | - void __stdcall DestroyOnlineRecognizerResult(const SherpaOnnxOnlineRecognizerResult* r); | ||
| 192 | - | ||
| 193 | - /// Reset an OnlineStream , which clears the neural network model state | ||
| 194 | - /// and the state for decoding. | ||
| 195 | - /// | ||
| 196 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer(). | ||
| 197 | - /// @param stream A pointer returned by CreateOnlineStream | ||
| 198 | - extern "C" __declspec(dllexport) | ||
| 199 | - void __stdcall Reset(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 200 | - SherpaOnnxOnlineStream* stream); | ||
| 201 | - | ||
| 202 | - /// Signal that no more audio samples would be available. | ||
| 203 | - /// After this call, you cannot call AcceptWaveform() any more. | ||
| 204 | - /// | ||
| 205 | - /// @param stream A pointer returned by CreateOnlineStream() | ||
| 206 | - extern "C" __declspec(dllexport) | ||
| 207 | - void __stdcall InputFinished(SherpaOnnxOnlineStream* stream); | ||
| 208 | - | ||
| 209 | - /// Return 1 if an endpoint has been detected. | ||
| 210 | - /// | ||
| 211 | - /// @param recognizer A pointer returned by CreateOnlineRecognizer() | ||
| 212 | - /// @param stream A pointer returned by CreateOnlineStream() | ||
| 213 | - /// @return Return 1 if an endpoint is detected. Return 0 otherwise. | ||
| 214 | - extern "C" __declspec(dllexport) | ||
| 215 | - int32_t __stdcall IsEndpoint(SherpaOnnxOnlineRecognizer* recognizer, | ||
| 216 | - SherpaOnnxOnlineStream* stream); | ||
| 217 | - | ||
| 218 | - // for displaying results on Linux/macOS. | ||
| 219 | - typedef struct SherpaOnnxDisplay SherpaOnnxDisplay; | ||
| 220 | - | ||
| 221 | - /// Create a display object. Must be freed using DestroyDisplay to avoid | ||
| 222 | - /// memory leak. | ||
| 223 | - extern "C" __declspec(dllexport) | ||
| 224 | - SherpaOnnxDisplay* __stdcall CreateDisplay(int32_t max_word_per_line); | ||
| 225 | - | ||
| 226 | - extern "C" __declspec(dllexport) | ||
| 227 | - void __stdcall DestroyDisplay(SherpaOnnxDisplay* display); | ||
| 228 | - | ||
| 229 | - /// Print the result. | ||
| 230 | - extern "C" __declspec(dllexport) | ||
| 231 | - void __stdcall SherpaOnnxPrint(SherpaOnnxDisplay* display, int32_t idx, const char* s); | ||
| 232 | - } | ||
| 233 | - | ||
| 234 | -#ifdef __cplusplus | ||
| 235 | -} /* extern "C" */ | ||
| 236 | -#endif | ||
| 237 | - | ||
| 238 | -#endif // SHERPA_ONNX_C_API_C_API_H_ |
-
请 注册 或 登录 后发表评论