Fangjun Kuang
Committed by GitHub

Support cross compiling for aarch64 (#52)

@@ -9,3 +9,4 @@ __pycache__ @@ -9,3 +9,4 @@ __pycache__
9 dist/ 9 dist/
10 sherpa_onnx.egg-info/ 10 sherpa_onnx.egg-info/
11 .DS_Store 11 .DS_Store
  12 +build-aarch64-linux-gnu
@@ -50,6 +50,12 @@ message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}") @@ -50,6 +50,12 @@ message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}")
50 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") 50 set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.")
51 set(CMAKE_CXX_EXTENSIONS OFF) 51 set(CMAKE_CXX_EXTENSIONS OFF)
52 52
  53 +include(CheckIncludeFileCXX)
  54 +check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA)
  55 +if(SHERPA_ONNX_HAS_ALSA)
  56 + add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1)
  57 +endif()
  58 +
53 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) 59 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules)
54 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) 60 list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake)
55 61
  1 +#!/usr/bin/env bash
  2 +
  3 +if ! command -v aarch64-linux-gnu-gcc &> /dev/null; then
  4 + echo "Please install a toolchain for cross-compiling."
  5 + echo "You can refer to: "
  6 + echo " https://k2-fsa.github.io/sherpa/onnx/install/aarch64-embedded-linux.html"
  7 + echo "for help."
  8 + exit 1
  9 +fi
  10 +
  11 +set -ex
  12 +
  13 +dir=build-aarch64-linux-gnu
  14 +mkdir -p $dir
  15 +cd $dir
  16 +
  17 +if [ ! -f alsa-lib/src/.libs/libasound.so ]; then
  18 + echo "Start to cross-compile alsa-lib"
  19 + if [ ! -d alsa-lib ]; then
  20 + git clone --depth 1 https://github.com/alsa-project/alsa-lib
  21 + fi
  22 + # If it shows:
  23 + # ./gitcompile: line 79: libtoolize: command not found
  24 + # Please use:
  25 + # sudo apt-get install libtool m4 automake
  26 + #
  27 + pushd alsa-lib
  28 + CC=aarch64-linux-gnu-gcc ./gitcompile --host=aarch64-linux-gnu
  29 + popd
  30 + echo "Finish cross-compiling alsa-lib"
  31 +fi
  32 +
  33 +export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH
  34 +export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs
  35 +
  36 +cmake \
  37 + -DCMAKE_INSTALL_PREFIX=./install \
  38 + -DCMAKE_BUILD_TYPE=Release \
  39 + -DBUILD_SHARED_LIBS=OFF \
  40 + -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  41 + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  42 + -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \
  43 + ..
  44 +
  45 +make VERBOSE=1 -j4
  46 +make install/strip
  47 +
  48 +# Enable it if only needed
  49 +# cp -v $SHERPA_ONNX_ALSA_LIB_DIR/libasound.so* ./install/lib/
1 function(download_onnxruntime) 1 function(download_onnxruntime)
2 include(FetchContent) 2 include(FetchContent)
3 3
4 - if(UNIX AND NOT APPLE)  
5 - # If you don't have access to the Internet,  
6 - # please pre-download onnxruntime  
7 - set(possible_file_locations  
8 - $ENV{HOME}/Downloads/onnxruntime-linux-x64-1.14.0.tgz  
9 - ${PROJECT_SOURCE_DIR}/onnxruntime-linux-x64-1.14.0.tgz  
10 - ${PROJECT_BINARY_DIR}/onnxruntime-linux-x64-1.14.0.tgz  
11 - /tmp/onnxruntime-linux-x64-1.14.0.tgz  
12 - /star-fj/fangjun/download/github/onnxruntime-linux-x64-1.14.0.tgz  
13 - )  
14 -  
15 - set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz")  
16 - set(onnxruntime_HASH "SHA256=92bf534e5fa5820c8dffe9de2850f84ed2a1c063e47c659ce09e8c7938aa2090")  
17 - # After downloading, it contains:  
18 - # ./lib/libonnxruntime.so.1.14.0  
19 - # ./lib/libonnxruntime.so, which is a symlink to lib/libonnxruntime.so.1.14.0  
20 - #  
21 - # ./include  
22 - # It contains all the needed header files 4 + if(CMAKE_SYSTEM_NAME STREQUAL Linux)
  5 + if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64)
  6 + # For embedded systems
  7 + set(possible_file_locations
  8 + $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-1.14.0.tgz
  9 + ${PROJECT_SOURCE_DIR}/onnxruntime-linux-aarch64-1.14.0.tgz
  10 + ${PROJECT_BINARY_DIR}/onnxruntime-linux-aarch64-1.14.0.tgz
  11 + /tmp/onnxruntime-linux-aarch64-1.14.0.tgz
  12 + /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-1.14.0.tgz
  13 + )
  14 + set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-aarch64-1.14.0.tgz")
  15 + set(onnxruntime_HASH "SHA256=9384d2e6e29fed693a4630303902392eead0c41bee5705ccac6d6d34a3d5db86")
  16 +
  17 + else()
  18 + # If you don't have access to the Internet,
  19 + # please pre-download onnxruntime
  20 + set(possible_file_locations
  21 + $ENV{HOME}/Downloads/onnxruntime-linux-x64-1.14.0.tgz
  22 + ${PROJECT_SOURCE_DIR}/onnxruntime-linux-x64-1.14.0.tgz
  23 + ${PROJECT_BINARY_DIR}/onnxruntime-linux-x64-1.14.0.tgz
  24 + /tmp/onnxruntime-linux-x64-1.14.0.tgz
  25 + /star-fj/fangjun/download/github/onnxruntime-linux-x64-1.14.0.tgz
  26 + )
  27 +
  28 + set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz")
  29 + set(onnxruntime_HASH "SHA256=92bf534e5fa5820c8dffe9de2850f84ed2a1c063e47c659ce09e8c7938aa2090")
  30 + # After downloading, it contains:
  31 + # ./lib/libonnxruntime.so.1.14.0
  32 + # ./lib/libonnxruntime.so, which is a symlink to lib/libonnxruntime.so.1.14.0
  33 + #
  34 + # ./include
  35 + # It contains all the needed header files
  36 + endif()
23 elseif(APPLE) 37 elseif(APPLE)
24 # If you don't have access to the Internet, 38 # If you don't have access to the Internet,
25 # please pre-download onnxruntime 39 # please pre-download onnxruntime
@@ -11,6 +11,7 @@ add_library(sherpa-onnx-core @@ -11,6 +11,7 @@ add_library(sherpa-onnx-core
11 online-transducer-model.cc 11 online-transducer-model.cc
12 online-zipformer-transducer-model.cc 12 online-zipformer-transducer-model.cc
13 onnx-utils.cc 13 onnx-utils.cc
  14 + resample.cc
14 symbol-table.cc 15 symbol-table.cc
15 text-utils.cc 16 text-utils.cc
16 unbind.cc 17 unbind.cc
@@ -32,6 +33,18 @@ endif() @@ -32,6 +33,18 @@ endif()
32 install(TARGETS sherpa-onnx-core DESTINATION lib) 33 install(TARGETS sherpa-onnx-core DESTINATION lib)
33 install(TARGETS sherpa-onnx DESTINATION bin) 34 install(TARGETS sherpa-onnx DESTINATION bin)
34 35
  36 +if(SHERPA_ONNX_HAS_ALSA)
  37 + add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc)
  38 + target_link_libraries(sherpa-onnx-alsa PRIVATE sherpa-onnx-core)
  39 +
  40 + if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR})
  41 + target_link_libraries(sherpa-onnx-alsa PRIVATE -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound)
  42 + else()
  43 + target_link_libraries(sherpa-onnx-alsa PRIVATE asound)
  44 + endif()
  45 + install(TARGETS sherpa-onnx-alsa DESTINATION bin)
  46 +endif()
  47 +
35 if(SHERPA_ONNX_ENABLE_TESTS) 48 if(SHERPA_ONNX_ENABLE_TESTS)
36 set(sherpa_onnx_test_srcs 49 set(sherpa_onnx_test_srcs
37 cat-test.cc 50 cat-test.cc
  1 +// sherpa-onnx/csrc/sherpa-alsa.cc
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#ifdef SHERPA_ONNX_ENABLE_ALSA
  6 +
  7 +#include "sherpa-onnx/csrc/alsa.h"
  8 +
  9 +#include <algorithm>
  10 +
  11 +#include "alsa/asoundlib.h"
  12 +
  13 +namespace sherpa_onnx {
  14 +
  15 +void ToFloat(const std::vector<int16_t> &in, int32_t num_channels,
  16 + std::vector<float> *out) {
  17 + out->resize(in.size() / num_channels);
  18 +
  19 + int32_t n = in.size();
  20 + for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) {
  21 + (*out)[k] = in[i] / 32768.;
  22 + }
  23 +}
  24 +
  25 +Alsa::Alsa(const char *device_name) {
  26 + const char *kDeviceHelp = R"(
  27 +Please use the command:
  28 +
  29 + arecord -l
  30 +
  31 +to list all available devices. For instance, if the output is:
  32 +
  33 +**** List of CAPTURE Hardware Devices ****
  34 +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio]
  35 + Subdevices: 1/1
  36 + Subdevice #0: subdevice #0
  37 +
  38 +and if you want to select card 3 and the device 0 on that card, please use:
  39 +
  40 + hw:3,0
  41 +
  42 + )";
  43 +
  44 + int32_t err =
  45 + snd_pcm_open(&capture_handle_, device_name, SND_PCM_STREAM_CAPTURE, 0);
  46 + if (err) {
  47 + fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err));
  48 + fprintf(stderr, "%s\n", kDeviceHelp);
  49 + exit(-1);
  50 + }
  51 +
  52 + snd_pcm_hw_params_t *hw_params;
  53 + snd_pcm_hw_params_alloca(&hw_params);
  54 +
  55 + err = snd_pcm_hw_params_any(capture_handle_, hw_params);
  56 + if (err) {
  57 + fprintf(stderr, "Failed to initialize hw_params: %s\n", snd_strerror(err));
  58 + exit(-1);
  59 + }
  60 +
  61 + err = snd_pcm_hw_params_set_access(capture_handle_, hw_params,
  62 + SND_PCM_ACCESS_RW_INTERLEAVED);
  63 + if (err) {
  64 + fprintf(stderr, "Failed to set access type: %s\n", snd_strerror(err));
  65 + exit(-1);
  66 + }
  67 +
  68 + err = snd_pcm_hw_params_set_format(capture_handle_, hw_params,
  69 + SND_PCM_FORMAT_S16_LE);
  70 + if (err) {
  71 + fprintf(stderr, "Failed to set format: %s\n", snd_strerror(err));
  72 + exit(-1);
  73 + }
  74 +
  75 + // mono
  76 + err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 1);
  77 + if (err) {
  78 + fprintf(stderr, "Failed to set number of channels to 1. %s\n",
  79 + snd_strerror(err));
  80 +
  81 + err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 2);
  82 + if (err) {
  83 + fprintf(stderr, "Failed to set number of channels to 2. %s\n",
  84 + snd_strerror(err));
  85 +
  86 + exit(-1);
  87 + }
  88 + actual_channel_count_ = 2;
  89 + fprintf(stderr,
  90 + "Channel count is set to 2. Will use only 1 channel of it.\n");
  91 + }
  92 +
  93 + uint32_t actual_sample_rate = expected_sample_rate_;
  94 +
  95 + int32_t dir = 0;
  96 + err = snd_pcm_hw_params_set_rate_near(capture_handle_, hw_params,
  97 + &actual_sample_rate, &dir);
  98 + if (err) {
  99 + fprintf(stderr, "Failed to set sample rate to, %d: %s\n",
  100 + expected_sample_rate_, snd_strerror(err));
  101 + exit(-1);
  102 + }
  103 + actual_sample_rate_ = actual_sample_rate;
  104 +
  105 + if (actual_sample_rate_ != expected_sample_rate_) {
  106 + fprintf(stderr, "Failed to set sample rate to %d\n", expected_sample_rate_);
  107 + fprintf(stderr, "Current sample rate is %d\n", actual_sample_rate_);
  108 + fprintf(stderr,
  109 + "Creating a resampler:\n"
  110 + " in_sample_rate: %d\n"
  111 + " output_sample_rate: %d\n",
  112 + actual_sample_rate_, expected_sample_rate_);
  113 +
  114 + float min_freq = std::min(actual_sample_rate_, expected_sample_rate_);
  115 + float lowpass_cutoff = 0.99 * 0.5 * min_freq;
  116 +
  117 + int32_t lowpass_filter_width = 6;
  118 + resampler_ = std::make_unique<LinearResample>(
  119 + actual_sample_rate_, expected_sample_rate_, lowpass_cutoff,
  120 + lowpass_filter_width);
  121 + } else {
  122 + fprintf(stderr, "Current sample rate: %d\n", actual_sample_rate_);
  123 + }
  124 +
  125 + err = snd_pcm_hw_params(capture_handle_, hw_params);
  126 + if (err) {
  127 + fprintf(stderr, "Failed to set hw params: %s\n", snd_strerror(err));
  128 + exit(-1);
  129 + }
  130 +
  131 + err = snd_pcm_prepare(capture_handle_);
  132 + if (err) {
  133 + fprintf(stderr, "Failed to prepare for recording: %s\n", snd_strerror(err));
  134 + exit(-1);
  135 + }
  136 +
  137 + fprintf(stderr, "Recording started!\n");
  138 +}
  139 +
  140 +Alsa::~Alsa() { snd_pcm_close(capture_handle_); }
  141 +
  142 +const std::vector<float> &Alsa::Read(int32_t num_samples) {
  143 + samples_.resize(num_samples * actual_channel_count_);
  144 +
  145 + // count is in frames. Each frame contains actual_channel_count_ samples
  146 + int32_t count = snd_pcm_readi(capture_handle_, samples_.data(), num_samples);
  147 +
  148 + samples_.resize(count * actual_channel_count_);
  149 +
  150 + ToFloat(samples_, actual_channel_count_, &samples1_);
  151 +
  152 + if (!resampler_) {
  153 + return samples1_;
  154 + }
  155 +
  156 + resampler_->Resample(samples1_.data(), samples_.size(), false, &samples2_);
  157 + return samples2_;
  158 +}
  159 +
  160 +} // namespace sherpa_onnx
  161 +
  162 +#endif
  1 +// sherpa-onnx/csrc/sherpa-alsa.h
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_ALSA_H_
  6 +#define SHERPA_ONNX_CSRC_ALSA_H_
  7 +
  8 +#include <memory>
  9 +#include <vector>
  10 +
  11 +#include "alsa/asoundlib.h"
  12 +#include "sherpa-onnx/csrc/resample.h"
  13 +
  14 +namespace sherpa_onnx {
  15 +
  16 +class Alsa {
  17 + public:
  18 + explicit Alsa(const char *device_name);
  19 + ~Alsa();
  20 +
  21 + // This is a blocking read.
  22 + //
  23 + // @param num_samples Number of samples to read.
  24 + //
  25 + // The returned value is valid until the next call to Read().
  26 + const std::vector<float> &Read(int32_t num_samples);
  27 +
  28 + int32_t GetExpectedSampleRate() const { return expected_sample_rate_; }
  29 + int32_t GetActualSampleRate() const { return actual_sample_rate_; }
  30 +
  31 + private:
  32 + snd_pcm_t *capture_handle_;
  33 + int32_t expected_sample_rate_ = 16000;
  34 + int32_t actual_sample_rate_;
  35 +
  36 + int32_t actual_channel_count_ = 1;
  37 +
  38 + std::unique_ptr<LinearResample> resampler_;
  39 + std::vector<int16_t> samples_; // directly from the microphone
  40 + std::vector<float> samples1_; // normalized version of samples_
  41 + std::vector<float> samples2_; // possibly resampled from samples1_
  42 +};
  43 +
  44 +} // namespace sherpa_onnx
  45 +
  46 +#endif // SHERPA_ONNX_CSRC_ALSA_H_
  1 +// sherpa-onnx/csrc/display.h
  2 +//
  3 +// Copyright (c) 2022-2023 Xiaomi Corporation
  4 +
  5 +#ifndef SHERPA_ONNX_CSRC_DISPLAY_H_
  6 +#define SHERPA_ONNX_CSRC_DISPLAY_H_
  7 +#include <stdio.h>
  8 +
  9 +#include <string>
  10 +
  11 +namespace sherpa_onnx {
  12 +
  13 +class Display {
  14 + public:
  15 + void Print(int32_t segment_id, const std::string &s) {
  16 +#ifdef _MSC_VER
  17 + fprintf(stderr, "%d:%s\n", segment_id, s.c_str());
  18 + return;
  19 +#endif
  20 + if (last_segment_ == segment_id) {
  21 + Clear();
  22 + } else {
  23 + if (last_segment_ != -1) {
  24 + fprintf(stderr, "\n\r");
  25 + }
  26 + last_segment_ = segment_id;
  27 + num_previous_lines_ = 0;
  28 + }
  29 +
  30 + fprintf(stderr, "\r%d:", segment_id);
  31 +
  32 + int32_t i = 0;
  33 + for (size_t n = 0; n < s.size();) {
  34 + if (s[n] > 0 && s[n] < 0x7f) {
  35 + fprintf(stderr, "%c", s[n]);
  36 + ++n;
  37 + } else {
  38 + // Each Chinese character occupies 3 bytes for UTF-8 encoding.
  39 + std::string tmp(s.begin() + n, s.begin() + n + 3);
  40 + fprintf(stderr, "%s", tmp.data());
  41 + n += 3;
  42 + }
  43 +
  44 + ++i;
  45 + if (i >= max_word_per_line_ && n + 1 < s.size() &&
  46 + (s[n] == ' ' || s[n] < 0)) {
  47 + fprintf(stderr, "\n\r ");
  48 + ++num_previous_lines_;
  49 + i = 0;
  50 + }
  51 + }
  52 + }
  53 +
  54 + private:
  55 + // Clear the output for the current segment
  56 + void Clear() {
  57 + ClearCurrentLine();
  58 + while (num_previous_lines_ > 0) {
  59 + GoUpOneLine();
  60 + ClearCurrentLine();
  61 + --num_previous_lines_;
  62 + }
  63 + }
  64 +
  65 + // Clear the current line
  66 + void ClearCurrentLine() const { fprintf(stderr, "\33[2K\r"); }
  67 +
  68 + // Move the cursor to the previous line
  69 + void GoUpOneLine() const { fprintf(stderr, "\033[1A\r"); }
  70 +
  71 + private:
  72 + int32_t max_word_per_line_ = 60;
  73 + int32_t num_previous_lines_ = 0;
  74 + int32_t last_segment_ = -1;
  75 +};
  76 +
  77 +} // namespace sherpa_onnx
  78 +
  79 +#endif // SHERPA_ONNX_CSRC_DISPLAY_H_
  1 +/**
  2 + * Copyright 2013 Pegah Ghahremani
  3 + * 2014 IMSL, PKU-HKUST (author: Wei Shi)
  4 + * 2014 Yanqing Sun, Junjie Wang
  5 + * 2014 Johns Hopkins University (author: Daniel Povey)
  6 + * Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  7 + *
  8 + * See LICENSE for clarification regarding multiple authors
  9 + *
  10 + * Licensed under the Apache License, Version 2.0 (the "License");
  11 + * you may not use this file except in compliance with the License.
  12 + * You may obtain a copy of the License at
  13 + *
  14 + * http://www.apache.org/licenses/LICENSE-2.0
  15 + *
  16 + * Unless required by applicable law or agreed to in writing, software
  17 + * distributed under the License is distributed on an "AS IS" BASIS,
  18 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19 + * See the License for the specific language governing permissions and
  20 + * limitations under the License.
  21 + */
  22 +// this file is copied and modified from
  23 +// kaldi/src/feat/resample.cc
  24 +
  25 +#include "sherpa-onnx/csrc/resample.h"
  26 +
  27 +#include <assert.h>
  28 +#include <math.h>
  29 +#include <stdio.h>
  30 +
  31 +#include <cstdlib>
  32 +#include <type_traits>
  33 +
  34 +#ifndef M_2PI
  35 +#define M_2PI 6.283185307179586476925286766559005
  36 +#endif
  37 +
  38 +#ifndef M_PI
  39 +#define M_PI 3.1415926535897932384626433832795
  40 +#endif
  41 +
  42 +namespace sherpa_onnx {
  43 +
  44 +template <class I>
  45 +I Gcd(I m, I n) {
  46 + // this function is copied from kaldi/src/base/kaldi-math.h
  47 + if (m == 0 || n == 0) {
  48 + if (m == 0 && n == 0) { // gcd not defined, as all integers are divisors.
  49 + fprintf(stderr, "Undefined GCD since m = 0, n = 0.");
  50 + exit(-1);
  51 + }
  52 + return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m));
  53 + // return absolute value of whichever is nonzero
  54 + }
  55 + // could use compile-time assertion
  56 + // but involves messing with complex template stuff.
  57 + static_assert(std::is_integral<I>::value, "");
  58 + while (1) {
  59 + m %= n;
  60 + if (m == 0) return (n > 0 ? n : -n);
  61 + n %= m;
  62 + if (n == 0) return (m > 0 ? m : -m);
  63 + }
  64 +}
  65 +
  66 +/// Returns the least common multiple of two integers. Will
  67 +/// crash unless the inputs are positive.
  68 +template <class I>
  69 +I Lcm(I m, I n) {
  70 + // This function is copied from kaldi/src/base/kaldi-math.h
  71 + assert(m > 0 && n > 0);
  72 + I gcd = Gcd(m, n);
  73 + return gcd * (m / gcd) * (n / gcd);
  74 +}
  75 +
  76 +static float DotProduct(const float *a, const float *b, int32_t n) {
  77 + float sum = 0;
  78 + for (int32_t i = 0; i != n; ++i) {
  79 + sum += a[i] * b[i];
  80 + }
  81 + return sum;
  82 +}
  83 +
  84 +LinearResample::LinearResample(int32_t samp_rate_in_hz,
  85 + int32_t samp_rate_out_hz, float filter_cutoff_hz,
  86 + int32_t num_zeros)
  87 + : samp_rate_in_(samp_rate_in_hz),
  88 + samp_rate_out_(samp_rate_out_hz),
  89 + filter_cutoff_(filter_cutoff_hz),
  90 + num_zeros_(num_zeros) {
  91 + assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 &&
  92 + filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz &&
  93 + filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0);
  94 +
  95 + // base_freq is the frequency of the repeating unit, which is the gcd
  96 + // of the input frequencies.
  97 + int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_);
  98 + input_samples_in_unit_ = samp_rate_in_ / base_freq;
  99 + output_samples_in_unit_ = samp_rate_out_ / base_freq;
  100 +
  101 + SetIndexesAndWeights();
  102 + Reset();
  103 +}
  104 +
  105 +void LinearResample::SetIndexesAndWeights() {
  106 + first_index_.resize(output_samples_in_unit_);
  107 + weights_.resize(output_samples_in_unit_);
  108 +
  109 + double window_width = num_zeros_ / (2.0 * filter_cutoff_);
  110 +
  111 + for (int32_t i = 0; i < output_samples_in_unit_; i++) {
  112 + double output_t = i / static_cast<double>(samp_rate_out_);
  113 + double min_t = output_t - window_width, max_t = output_t + window_width;
  114 + // we do ceil on the min and floor on the max, because if we did it
  115 + // the other way around we would unnecessarily include indexes just
  116 + // outside the window, with zero coefficients. It's possible
  117 + // if the arguments to the ceil and floor expressions are integers
  118 + // (e.g. if filter_cutoff_ has an exact ratio with the sample rates),
  119 + // that we unnecessarily include something with a zero coefficient,
  120 + // but this is only a slight efficiency issue.
  121 + int32_t min_input_index = ceil(min_t * samp_rate_in_),
  122 + max_input_index = floor(max_t * samp_rate_in_),
  123 + num_indices = max_input_index - min_input_index + 1;
  124 + first_index_[i] = min_input_index;
  125 + weights_[i].resize(num_indices);
  126 + for (int32_t j = 0; j < num_indices; j++) {
  127 + int32_t input_index = min_input_index + j;
  128 + double input_t = input_index / static_cast<double>(samp_rate_in_),
  129 + delta_t = input_t - output_t;
  130 + // sign of delta_t doesn't matter.
  131 + weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_;
  132 + }
  133 + }
  134 +}
  135 +
  136 +/** Here, t is a time in seconds representing an offset from
  137 + the center of the windowed filter function, and FilterFunction(t)
  138 + returns the windowed filter function, described
  139 + in the header as h(t) = f(t)g(t), evaluated at t.
  140 +*/
  141 +float LinearResample::FilterFunc(float t) const {
  142 + float window, // raised-cosine (Hanning) window of width
  143 + // num_zeros_/2*filter_cutoff_
  144 + filter; // sinc filter function
  145 + if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_))
  146 + window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t));
  147 + else
  148 + window = 0.0; // outside support of window function
  149 + if (t != 0)
  150 + filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t);
  151 + else
  152 + filter = 2 * filter_cutoff_; // limit of the function at t = 0
  153 + return filter * window;
  154 +}
  155 +
  156 +void LinearResample::Reset() {
  157 + input_sample_offset_ = 0;
  158 + output_sample_offset_ = 0;
  159 + input_remainder_.resize(0);
  160 +}
  161 +
  162 +void LinearResample::Resample(const float *input, int32_t input_dim, bool flush,
  163 + std::vector<float> *output) {
  164 + int64_t tot_input_samp = input_sample_offset_ + input_dim,
  165 + tot_output_samp = GetNumOutputSamples(tot_input_samp, flush);
  166 +
  167 + assert(tot_output_samp >= output_sample_offset_);
  168 +
  169 + output->resize(tot_output_samp - output_sample_offset_);
  170 +
  171 + // samp_out is the index into the total output signal, not just the part
  172 + // of it we are producing here.
  173 + for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp;
  174 + samp_out++) {
  175 + int64_t first_samp_in;
  176 + int32_t samp_out_wrapped;
  177 + GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped);
  178 + const std::vector<float> &weights = weights_[samp_out_wrapped];
  179 + // first_input_index is the first index into "input" that we have a weight
  180 + // for.
  181 + int32_t first_input_index =
  182 + static_cast<int32_t>(first_samp_in - input_sample_offset_);
  183 + float this_output;
  184 + if (first_input_index >= 0 &&
  185 + first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) {
  186 + this_output =
  187 + DotProduct(input + first_input_index, weights.data(), weights.size());
  188 + } else { // Handle edge cases.
  189 + this_output = 0.0;
  190 + for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) {
  191 + float weight = weights[i];
  192 + int32_t input_index = first_input_index + i;
  193 + if (input_index < 0 &&
  194 + static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) {
  195 + this_output +=
  196 + weight * input_remainder_[input_remainder_.size() + input_index];
  197 + } else if (input_index >= 0 && input_index < input_dim) {
  198 + this_output += weight * input[input_index];
  199 + } else if (input_index >= input_dim) {
  200 + // We're past the end of the input and are adding zero; should only
  201 + // happen if the user specified flush == true, or else we would not
  202 + // be trying to output this sample.
  203 + assert(flush);
  204 + }
  205 + }
  206 + }
  207 + int32_t output_index =
  208 + static_cast<int32_t>(samp_out - output_sample_offset_);
  209 + (*output)[output_index] = this_output;
  210 + }
  211 +
  212 + if (flush) {
  213 + Reset(); // Reset the internal state.
  214 + } else {
  215 + SetRemainder(input, input_dim);
  216 + input_sample_offset_ = tot_input_samp;
  217 + output_sample_offset_ = tot_output_samp;
  218 + }
  219 +}
  220 +
  221 +int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp,
  222 + bool flush) const {
  223 + // For exact computation, we measure time in "ticks" of 1.0 / tick_freq,
  224 + // where tick_freq is the least common multiple of samp_rate_in_ and
  225 + // samp_rate_out_.
  226 + int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_);
  227 + int32_t ticks_per_input_period = tick_freq / samp_rate_in_;
  228 +
  229 + // work out the number of ticks in the time interval
  230 + // [ 0, input_num_samp/samp_rate_in_ ).
  231 + int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period;
  232 + if (!flush) {
  233 + float window_width = num_zeros_ / (2.0 * filter_cutoff_);
  234 + // To count the window-width in ticks we take the floor. This
  235 + // is because since we're looking for the largest integer num-out-samp
  236 + // that fits in the interval, which is open on the right, a reduction
  237 + // in interval length of less than a tick will never make a difference.
  238 + // For example, the largest integer in the interval [ 0, 2 ) and the
  239 + // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one).
  240 + // So when we're subtracting the window-width we can ignore the fractional
  241 + // part.
  242 + int32_t window_width_ticks = floor(window_width * tick_freq);
  243 + // The time-period of the output that we can sample gets reduced
  244 + // by the window-width (which is actually the distance from the
  245 + // center to the edge of the windowing function) if we're not
  246 + // "flushing the output".
  247 + interval_length_in_ticks -= window_width_ticks;
  248 + }
  249 + if (interval_length_in_ticks <= 0) return 0;
  250 +
  251 + int32_t ticks_per_output_period = tick_freq / samp_rate_out_;
  252 + // Get the last output-sample in the closed interval, i.e. replacing [ ) with
  253 + // [ ]. Note: integer division rounds down. See
  254 + // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of
  255 + // the notation.
  256 + int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period;
  257 + // We need the last output-sample in the open interval, so if it takes us to
  258 + // the end of the interval exactly, subtract one.
  259 + if (last_output_samp * ticks_per_output_period == interval_length_in_ticks)
  260 + last_output_samp--;
  261 +
  262 + // First output-sample index is zero, so the number of output samples
  263 + // is the last output-sample plus one.
  264 + int64_t num_output_samp = last_output_samp + 1;
  265 + return num_output_samp;
  266 +}
  267 +
  268 +// inline
  269 +void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in,
  270 + int32_t *samp_out_wrapped) const {
  271 + // A unit is the smallest nonzero amount of time that is an exact
  272 + // multiple of the input and output sample periods. The unit index
  273 + // is the answer to "which numbered unit we are in".
  274 + int64_t unit_index = samp_out / output_samples_in_unit_;
  275 + // samp_out_wrapped is equal to samp_out % output_samples_in_unit_
  276 + *samp_out_wrapped =
  277 + static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_);
  278 + *first_samp_in =
  279 + first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_;
  280 +}
  281 +
  282 +void LinearResample::SetRemainder(const float *input, int32_t input_dim) {
  283 + std::vector<float> old_remainder(input_remainder_);
  284 + // max_remainder_needed is the width of the filter from side to side,
  285 + // measured in input samples. you might think it should be half that,
  286 + // but you have to consider that you might be wanting to output samples
  287 + // that are "in the past" relative to the beginning of the latest
  288 + // input... anyway, storing more remainder than needed is not harmful.
  289 + int32_t max_remainder_needed =
  290 + ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_);
  291 + input_remainder_.resize(max_remainder_needed);
  292 + for (int32_t index = -static_cast<int32_t>(input_remainder_.size());
  293 + index < 0; index++) {
  294 + // we interpret "index" as an offset from the end of "input" and
  295 + // from the end of input_remainder_.
  296 + int32_t input_index = index + input_dim;
  297 + if (input_index >= 0) {
  298 + input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
  299 + input[input_index];
  300 + } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) {
  301 + input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] =
  302 + old_remainder[input_index +
  303 + static_cast<int32_t>(old_remainder.size())];
  304 + // else leave it at zero.
  305 + }
  306 + }
  307 +}
  308 +
  309 +} // namespace sherpa_onnx
  1 +/**
  2 + * Copyright 2013 Pegah Ghahremani
  3 + * 2014 IMSL, PKU-HKUST (author: Wei Shi)
  4 + * 2014 Yanqing Sun, Junjie Wang
  5 + * 2014 Johns Hopkins University (author: Daniel Povey)
  6 + * Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang)
  7 + *
  8 + * See LICENSE for clarification regarding multiple authors
  9 + *
  10 + * Licensed under the Apache License, Version 2.0 (the "License");
  11 + * you may not use this file except in compliance with the License.
  12 + * You may obtain a copy of the License at
  13 + *
  14 + * http://www.apache.org/licenses/LICENSE-2.0
  15 + *
  16 + * Unless required by applicable law or agreed to in writing, software
  17 + * distributed under the License is distributed on an "AS IS" BASIS,
  18 + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  19 + * See the License for the specific language governing permissions and
  20 + * limitations under the License.
  21 + */
  22 +// this file is copied and modified from
  23 +// kaldi/src/feat/resample.h
  24 +#ifndef SHERPA_ONNX_CSRC_RESAMPLE_H_
  25 +#define SHERPA_ONNX_CSRC_RESAMPLE_H_
  26 +
  27 +#include <cstdint>
  28 +#include <vector>
  29 +
  30 +namespace sherpa_onnx {
  31 +
  32 +/*
  33 + We require that the input and output sampling rate be specified as
  34 + integers, as this is an easy way to specify that their ratio be rational.
  35 +*/
  36 +
  37 +class LinearResample {
  38 + public:
  39 + /// Constructor. We make the input and output sample rates integers, because
  40 + /// we are going to need to find a common divisor. This should just remind
  41 + /// you that they need to be integers. The filter cutoff needs to be less
  42 + /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2. num_zeros
  43 + /// controls the sharpness of the filter, more == sharper but less efficient.
  44 + /// We suggest around 4 to 10 for normal use.
  45 + LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz,
  46 + float filter_cutoff_hz, int32_t num_zeros);
  47 +
  48 + /// Calling the function Reset() resets the state of the object prior to
  49 + /// processing a new signal; it is only necessary if you have called
  50 + /// Resample(x, x_size, false, y) for some signal, leading to a remainder of
  51 + /// the signal being called, but then abandon processing the signal before
  52 + /// calling Resample(x, x_size, true, y) for the last piece. Call it
  53 + /// unnecessarily between signals will not do any harm.
  54 + void Reset();
  55 +
  56 + /// This function does the resampling. If you call it with flush == true and
  57 + /// you have never called it with flush == false, it just resamples the input
  58 + /// signal (it resizes the output to a suitable number of samples).
  59 + ///
  60 + /// You can also use this function to process a signal a piece at a time.
  61 + /// suppose you break it into piece1, piece2, ... pieceN. You can call
  62 + /// \code{.cc}
  63 + /// Resample(piece1, piece1_size, false, &output1);
  64 + /// Resample(piece2, piece2_size, false, &output2);
  65 + /// Resample(piece3, piece3_size, true, &output3);
  66 + /// \endcode
  67 + /// If you call it with flush == false, it won't output the last few samples
  68 + /// but will remember them, so that if you later give it a second piece of
  69 + /// the input signal it can process it correctly.
  70 + /// If your most recent call to the object was with flush == false, it will
  71 + /// have internal state; you can remove this by calling Reset().
  72 + /// Empty input is acceptable.
  73 + void Resample(const float *input, int32_t input_dim, bool flush,
  74 + std::vector<float> *output);
  75 +
  76 + //// Return the input and output sampling rates (for checks, for example)
  77 + int32_t GetInputSamplingRate() const { return samp_rate_in_; }
  78 + int32_t GetOutputSamplingRate() const { return samp_rate_out_; }
  79 +
  80 + private:
  81 + void SetIndexesAndWeights();
  82 +
  83 + float FilterFunc(float) const;
  84 +
  85 + /// This function outputs the number of output samples we will output
  86 + /// for a signal with "input_num_samp" input samples. If flush == true,
  87 + /// we return the largest n such that
  88 + /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ),
  89 + /// and note that the interval is half-open. If flush == false,
  90 + /// define window_width as num_zeros / (2.0 * filter_cutoff_);
  91 + /// we return the largest n such that (n/samp_rate_out_) is in the interval
  92 + /// [ 0, input_num_samp/samp_rate_in_ - window_width ).
  93 + int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const;
  94 +
  95 + /// Given an output-sample index, this function outputs to *first_samp_in the
  96 + /// first input-sample index that we have a weight on (may be negative),
  97 + /// and to *samp_out_wrapped the index into weights_ where we can get the
  98 + /// corresponding weights on the input.
  99 + inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in,
  100 + int32_t *samp_out_wrapped) const;
  101 +
  102 + void SetRemainder(const float *input, int32_t input_dim);
  103 +
  104 + private:
  105 + // The following variables are provided by the user.
  106 + int32_t samp_rate_in_;
  107 + int32_t samp_rate_out_;
  108 + float filter_cutoff_;
  109 + int32_t num_zeros_;
  110 +
  111 + int32_t input_samples_in_unit_; ///< The number of input samples in the
  112 + ///< smallest repeating unit: num_samp_in_ =
  113 + ///< samp_rate_in_hz / Gcd(samp_rate_in_hz,
  114 + ///< samp_rate_out_hz)
  115 +
  116 + int32_t output_samples_in_unit_; ///< The number of output samples in the
  117 + ///< smallest repeating unit: num_samp_out_
  118 + ///< = samp_rate_out_hz /
  119 + ///< Gcd(samp_rate_in_hz, samp_rate_out_hz)
  120 +
  121 + /// The first input-sample index that we sum over, for this output-sample
  122 + /// index. May be negative; any truncation at the beginning is handled
  123 + /// separately. This is just for the first few output samples, but we can
  124 + /// extrapolate the correct input-sample index for arbitrary output samples.
  125 + std::vector<int32_t> first_index_;
  126 +
  127 + /// Weights on the input samples, for this output-sample index.
  128 + std::vector<std::vector<float>> weights_;
  129 +
  130 + // the following variables keep track of where we are in a particular signal,
  131 + // if it is being provided over multiple calls to Resample().
  132 +
  133 + int64_t input_sample_offset_; ///< The number of input samples we have
  134 + ///< already received for this signal
  135 + ///< (including anything in remainder_)
  136 + int64_t output_sample_offset_; ///< The number of samples we have already
  137 + ///< output for this signal.
  138 + std::vector<float> input_remainder_; ///< A small trailing part of the
  139 + ///< previously seen input signal.
  140 +};
  141 +
  142 +} // namespace sherpa_onnx
  143 +
  144 +#endif // SHERPA_ONNX_CSRC_RESAMPLE_H_
@@ -10,9 +10,6 @@ @@ -10,9 +10,6 @@
10 10
11 #include "sherpa-onnx/csrc/online-recognizer.h" 11 #include "sherpa-onnx/csrc/online-recognizer.h"
12 #include "sherpa-onnx/csrc/online-stream.h" 12 #include "sherpa-onnx/csrc/online-stream.h"
13 -#include "sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h"  
14 -#include "sherpa-onnx/csrc/online-transducer-model-config.h"  
15 -#include "sherpa-onnx/csrc/online-transducer-model.h"  
16 #include "sherpa-onnx/csrc/symbol-table.h" 13 #include "sherpa-onnx/csrc/symbol-table.h"
17 #include "sherpa-onnx/csrc/wave-reader.h" 14 #include "sherpa-onnx/csrc/wave-reader.h"
18 15
  1 +# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/aarch64-linux-gnu.toolchain.cmake
  2 +
  3 +set(CMAKE_SYSTEM_NAME Linux)
  4 +set(CMAKE_SYSTEM_PROCESSOR aarch64)
  5 +
  6 +set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc")
  7 +set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++")
  8 +
  9 +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
  10 +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
  11 +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
  12 +
  13 +set(CMAKE_C_FLAGS "-march=armv8-a")
  14 +set(CMAKE_CXX_FLAGS "-march=armv8-a")
  15 +
  16 +# cache flags
  17 +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
  18 +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")