Committed by
GitHub
Support cross compiling for aarch64 (#52)
正在显示
12 个修改的文件
包含
860 行增加
和
22 行删除
| @@ -50,6 +50,12 @@ message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}") | @@ -50,6 +50,12 @@ message(STATUS "SHERPA_ONNX_ENABLE_PYTHON ${SHERPA_ONNX_ENABLE_PYTHON}") | ||
| 50 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") | 50 | set(CMAKE_CXX_STANDARD 14 CACHE STRING "The C++ version to be used.") |
| 51 | set(CMAKE_CXX_EXTENSIONS OFF) | 51 | set(CMAKE_CXX_EXTENSIONS OFF) |
| 52 | 52 | ||
| 53 | +include(CheckIncludeFileCXX) | ||
| 54 | +check_include_file_cxx(alsa/asoundlib.h SHERPA_ONNX_HAS_ALSA) | ||
| 55 | +if(SHERPA_ONNX_HAS_ALSA) | ||
| 56 | + add_definitions(-DSHERPA_ONNX_ENABLE_ALSA=1) | ||
| 57 | +endif() | ||
| 58 | + | ||
| 53 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) | 59 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/Modules) |
| 54 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) | 60 | list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake) |
| 55 | 61 |
build-aarch64-linux-gnu.sh
0 → 100755
| 1 | +#!/usr/bin/env bash | ||
| 2 | + | ||
| 3 | +if ! command -v aarch64-linux-gnu-gcc &> /dev/null; then | ||
| 4 | + echo "Please install a toolchain for cross-compiling." | ||
| 5 | + echo "You can refer to: " | ||
| 6 | + echo " https://k2-fsa.github.io/sherpa/onnx/install/aarch64-embedded-linux.html" | ||
| 7 | + echo "for help." | ||
| 8 | + exit 1 | ||
| 9 | +fi | ||
| 10 | + | ||
| 11 | +set -ex | ||
| 12 | + | ||
| 13 | +dir=build-aarch64-linux-gnu | ||
| 14 | +mkdir -p $dir | ||
| 15 | +cd $dir | ||
| 16 | + | ||
| 17 | +if [ ! -f alsa-lib/src/.libs/libasound.so ]; then | ||
| 18 | + echo "Start to cross-compile alsa-lib" | ||
| 19 | + if [ ! -d alsa-lib ]; then | ||
| 20 | + git clone --depth 1 https://github.com/alsa-project/alsa-lib | ||
| 21 | + fi | ||
| 22 | + # If it shows: | ||
| 23 | + # ./gitcompile: line 79: libtoolize: command not found | ||
| 24 | + # Please use: | ||
| 25 | + # sudo apt-get install libtool m4 automake | ||
| 26 | + # | ||
| 27 | + pushd alsa-lib | ||
| 28 | + CC=aarch64-linux-gnu-gcc ./gitcompile --host=aarch64-linux-gnu | ||
| 29 | + popd | ||
| 30 | + echo "Finish cross-compiling alsa-lib" | ||
| 31 | +fi | ||
| 32 | + | ||
| 33 | +export CPLUS_INCLUDE_PATH=$PWD/alsa-lib/include:$CPLUS_INCLUDE_PATH | ||
| 34 | +export SHERPA_ONNX_ALSA_LIB_DIR=$PWD/alsa-lib/src/.libs | ||
| 35 | + | ||
| 36 | +cmake \ | ||
| 37 | + -DCMAKE_INSTALL_PREFIX=./install \ | ||
| 38 | + -DCMAKE_BUILD_TYPE=Release \ | ||
| 39 | + -DBUILD_SHARED_LIBS=OFF \ | ||
| 40 | + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ | ||
| 41 | + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ | ||
| 42 | + -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \ | ||
| 43 | + .. | ||
| 44 | + | ||
| 45 | +make VERBOSE=1 -j4 | ||
| 46 | +make install/strip | ||
| 47 | + | ||
| 48 | +# Enable it if only needed | ||
| 49 | +# cp -v $SHERPA_ONNX_ALSA_LIB_DIR/libasound.so* ./install/lib/ |
| 1 | function(download_onnxruntime) | 1 | function(download_onnxruntime) |
| 2 | include(FetchContent) | 2 | include(FetchContent) |
| 3 | 3 | ||
| 4 | - if(UNIX AND NOT APPLE) | ||
| 5 | - # If you don't have access to the Internet, | ||
| 6 | - # please pre-download onnxruntime | ||
| 7 | - set(possible_file_locations | ||
| 8 | - $ENV{HOME}/Downloads/onnxruntime-linux-x64-1.14.0.tgz | ||
| 9 | - ${PROJECT_SOURCE_DIR}/onnxruntime-linux-x64-1.14.0.tgz | ||
| 10 | - ${PROJECT_BINARY_DIR}/onnxruntime-linux-x64-1.14.0.tgz | ||
| 11 | - /tmp/onnxruntime-linux-x64-1.14.0.tgz | ||
| 12 | - /star-fj/fangjun/download/github/onnxruntime-linux-x64-1.14.0.tgz | ||
| 13 | - ) | ||
| 14 | - | ||
| 15 | - set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz") | ||
| 16 | - set(onnxruntime_HASH "SHA256=92bf534e5fa5820c8dffe9de2850f84ed2a1c063e47c659ce09e8c7938aa2090") | ||
| 17 | - # After downloading, it contains: | ||
| 18 | - # ./lib/libonnxruntime.so.1.14.0 | ||
| 19 | - # ./lib/libonnxruntime.so, which is a symlink to lib/libonnxruntime.so.1.14.0 | ||
| 20 | - # | ||
| 21 | - # ./include | ||
| 22 | - # It contains all the needed header files | 4 | + if(CMAKE_SYSTEM_NAME STREQUAL Linux) |
| 5 | + if(CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64) | ||
| 6 | + # For embedded systems | ||
| 7 | + set(possible_file_locations | ||
| 8 | + $ENV{HOME}/Downloads/onnxruntime-linux-aarch64-1.14.0.tgz | ||
| 9 | + ${PROJECT_SOURCE_DIR}/onnxruntime-linux-aarch64-1.14.0.tgz | ||
| 10 | + ${PROJECT_BINARY_DIR}/onnxruntime-linux-aarch64-1.14.0.tgz | ||
| 11 | + /tmp/onnxruntime-linux-aarch64-1.14.0.tgz | ||
| 12 | + /star-fj/fangjun/download/github/onnxruntime-linux-aarch64-1.14.0.tgz | ||
| 13 | + ) | ||
| 14 | + set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-aarch64-1.14.0.tgz") | ||
| 15 | + set(onnxruntime_HASH "SHA256=9384d2e6e29fed693a4630303902392eead0c41bee5705ccac6d6d34a3d5db86") | ||
| 16 | + | ||
| 17 | + else() | ||
| 18 | + # If you don't have access to the Internet, | ||
| 19 | + # please pre-download onnxruntime | ||
| 20 | + set(possible_file_locations | ||
| 21 | + $ENV{HOME}/Downloads/onnxruntime-linux-x64-1.14.0.tgz | ||
| 22 | + ${PROJECT_SOURCE_DIR}/onnxruntime-linux-x64-1.14.0.tgz | ||
| 23 | + ${PROJECT_BINARY_DIR}/onnxruntime-linux-x64-1.14.0.tgz | ||
| 24 | + /tmp/onnxruntime-linux-x64-1.14.0.tgz | ||
| 25 | + /star-fj/fangjun/download/github/onnxruntime-linux-x64-1.14.0.tgz | ||
| 26 | + ) | ||
| 27 | + | ||
| 28 | + set(onnxruntime_URL "https://github.com/microsoft/onnxruntime/releases/download/v1.14.0/onnxruntime-linux-x64-1.14.0.tgz") | ||
| 29 | + set(onnxruntime_HASH "SHA256=92bf534e5fa5820c8dffe9de2850f84ed2a1c063e47c659ce09e8c7938aa2090") | ||
| 30 | + # After downloading, it contains: | ||
| 31 | + # ./lib/libonnxruntime.so.1.14.0 | ||
| 32 | + # ./lib/libonnxruntime.so, which is a symlink to lib/libonnxruntime.so.1.14.0 | ||
| 33 | + # | ||
| 34 | + # ./include | ||
| 35 | + # It contains all the needed header files | ||
| 36 | + endif() | ||
| 23 | elseif(APPLE) | 37 | elseif(APPLE) |
| 24 | # If you don't have access to the Internet, | 38 | # If you don't have access to the Internet, |
| 25 | # please pre-download onnxruntime | 39 | # please pre-download onnxruntime |
| @@ -11,6 +11,7 @@ add_library(sherpa-onnx-core | @@ -11,6 +11,7 @@ add_library(sherpa-onnx-core | ||
| 11 | online-transducer-model.cc | 11 | online-transducer-model.cc |
| 12 | online-zipformer-transducer-model.cc | 12 | online-zipformer-transducer-model.cc |
| 13 | onnx-utils.cc | 13 | onnx-utils.cc |
| 14 | + resample.cc | ||
| 14 | symbol-table.cc | 15 | symbol-table.cc |
| 15 | text-utils.cc | 16 | text-utils.cc |
| 16 | unbind.cc | 17 | unbind.cc |
| @@ -32,6 +33,18 @@ endif() | @@ -32,6 +33,18 @@ endif() | ||
| 32 | install(TARGETS sherpa-onnx-core DESTINATION lib) | 33 | install(TARGETS sherpa-onnx-core DESTINATION lib) |
| 33 | install(TARGETS sherpa-onnx DESTINATION bin) | 34 | install(TARGETS sherpa-onnx DESTINATION bin) |
| 34 | 35 | ||
| 36 | +if(SHERPA_ONNX_HAS_ALSA) | ||
| 37 | + add_executable(sherpa-onnx-alsa sherpa-onnx-alsa.cc alsa.cc) | ||
| 38 | + target_link_libraries(sherpa-onnx-alsa PRIVATE sherpa-onnx-core) | ||
| 39 | + | ||
| 40 | + if(DEFINED ENV{SHERPA_ONNX_ALSA_LIB_DIR}) | ||
| 41 | + target_link_libraries(sherpa-onnx-alsa PRIVATE -L$ENV{SHERPA_ONNX_ALSA_LIB_DIR} -lasound) | ||
| 42 | + else() | ||
| 43 | + target_link_libraries(sherpa-onnx-alsa PRIVATE asound) | ||
| 44 | + endif() | ||
| 45 | + install(TARGETS sherpa-onnx-alsa DESTINATION bin) | ||
| 46 | +endif() | ||
| 47 | + | ||
| 35 | if(SHERPA_ONNX_ENABLE_TESTS) | 48 | if(SHERPA_ONNX_ENABLE_TESTS) |
| 36 | set(sherpa_onnx_test_srcs | 49 | set(sherpa_onnx_test_srcs |
| 37 | cat-test.cc | 50 | cat-test.cc |
sherpa-onnx/csrc/alsa.cc
0 → 100644
| 1 | +// sherpa-onnx/csrc/sherpa-alsa.cc | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifdef SHERPA_ONNX_ENABLE_ALSA | ||
| 6 | + | ||
| 7 | +#include "sherpa-onnx/csrc/alsa.h" | ||
| 8 | + | ||
| 9 | +#include <algorithm> | ||
| 10 | + | ||
| 11 | +#include "alsa/asoundlib.h" | ||
| 12 | + | ||
| 13 | +namespace sherpa_onnx { | ||
| 14 | + | ||
| 15 | +void ToFloat(const std::vector<int16_t> &in, int32_t num_channels, | ||
| 16 | + std::vector<float> *out) { | ||
| 17 | + out->resize(in.size() / num_channels); | ||
| 18 | + | ||
| 19 | + int32_t n = in.size(); | ||
| 20 | + for (int32_t i = 0, k = 0; i < n; i += num_channels, ++k) { | ||
| 21 | + (*out)[k] = in[i] / 32768.; | ||
| 22 | + } | ||
| 23 | +} | ||
| 24 | + | ||
| 25 | +Alsa::Alsa(const char *device_name) { | ||
| 26 | + const char *kDeviceHelp = R"( | ||
| 27 | +Please use the command: | ||
| 28 | + | ||
| 29 | + arecord -l | ||
| 30 | + | ||
| 31 | +to list all available devices. For instance, if the output is: | ||
| 32 | + | ||
| 33 | +**** List of CAPTURE Hardware Devices **** | ||
| 34 | +card 3: UACDemoV10 [UACDemoV1.0], device 0: USB Audio [USB Audio] | ||
| 35 | + Subdevices: 1/1 | ||
| 36 | + Subdevice #0: subdevice #0 | ||
| 37 | + | ||
| 38 | +and if you want to select card 3 and the device 0 on that card, please use: | ||
| 39 | + | ||
| 40 | + hw:3,0 | ||
| 41 | + | ||
| 42 | + )"; | ||
| 43 | + | ||
| 44 | + int32_t err = | ||
| 45 | + snd_pcm_open(&capture_handle_, device_name, SND_PCM_STREAM_CAPTURE, 0); | ||
| 46 | + if (err) { | ||
| 47 | + fprintf(stderr, "Unable to open: %s. %s\n", device_name, snd_strerror(err)); | ||
| 48 | + fprintf(stderr, "%s\n", kDeviceHelp); | ||
| 49 | + exit(-1); | ||
| 50 | + } | ||
| 51 | + | ||
| 52 | + snd_pcm_hw_params_t *hw_params; | ||
| 53 | + snd_pcm_hw_params_alloca(&hw_params); | ||
| 54 | + | ||
| 55 | + err = snd_pcm_hw_params_any(capture_handle_, hw_params); | ||
| 56 | + if (err) { | ||
| 57 | + fprintf(stderr, "Failed to initialize hw_params: %s\n", snd_strerror(err)); | ||
| 58 | + exit(-1); | ||
| 59 | + } | ||
| 60 | + | ||
| 61 | + err = snd_pcm_hw_params_set_access(capture_handle_, hw_params, | ||
| 62 | + SND_PCM_ACCESS_RW_INTERLEAVED); | ||
| 63 | + if (err) { | ||
| 64 | + fprintf(stderr, "Failed to set access type: %s\n", snd_strerror(err)); | ||
| 65 | + exit(-1); | ||
| 66 | + } | ||
| 67 | + | ||
| 68 | + err = snd_pcm_hw_params_set_format(capture_handle_, hw_params, | ||
| 69 | + SND_PCM_FORMAT_S16_LE); | ||
| 70 | + if (err) { | ||
| 71 | + fprintf(stderr, "Failed to set format: %s\n", snd_strerror(err)); | ||
| 72 | + exit(-1); | ||
| 73 | + } | ||
| 74 | + | ||
| 75 | + // mono | ||
| 76 | + err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 1); | ||
| 77 | + if (err) { | ||
| 78 | + fprintf(stderr, "Failed to set number of channels to 1. %s\n", | ||
| 79 | + snd_strerror(err)); | ||
| 80 | + | ||
| 81 | + err = snd_pcm_hw_params_set_channels(capture_handle_, hw_params, 2); | ||
| 82 | + if (err) { | ||
| 83 | + fprintf(stderr, "Failed to set number of channels to 2. %s\n", | ||
| 84 | + snd_strerror(err)); | ||
| 85 | + | ||
| 86 | + exit(-1); | ||
| 87 | + } | ||
| 88 | + actual_channel_count_ = 2; | ||
| 89 | + fprintf(stderr, | ||
| 90 | + "Channel count is set to 2. Will use only 1 channel of it.\n"); | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + uint32_t actual_sample_rate = expected_sample_rate_; | ||
| 94 | + | ||
| 95 | + int32_t dir = 0; | ||
| 96 | + err = snd_pcm_hw_params_set_rate_near(capture_handle_, hw_params, | ||
| 97 | + &actual_sample_rate, &dir); | ||
| 98 | + if (err) { | ||
| 99 | + fprintf(stderr, "Failed to set sample rate to, %d: %s\n", | ||
| 100 | + expected_sample_rate_, snd_strerror(err)); | ||
| 101 | + exit(-1); | ||
| 102 | + } | ||
| 103 | + actual_sample_rate_ = actual_sample_rate; | ||
| 104 | + | ||
| 105 | + if (actual_sample_rate_ != expected_sample_rate_) { | ||
| 106 | + fprintf(stderr, "Failed to set sample rate to %d\n", expected_sample_rate_); | ||
| 107 | + fprintf(stderr, "Current sample rate is %d\n", actual_sample_rate_); | ||
| 108 | + fprintf(stderr, | ||
| 109 | + "Creating a resampler:\n" | ||
| 110 | + " in_sample_rate: %d\n" | ||
| 111 | + " output_sample_rate: %d\n", | ||
| 112 | + actual_sample_rate_, expected_sample_rate_); | ||
| 113 | + | ||
| 114 | + float min_freq = std::min(actual_sample_rate_, expected_sample_rate_); | ||
| 115 | + float lowpass_cutoff = 0.99 * 0.5 * min_freq; | ||
| 116 | + | ||
| 117 | + int32_t lowpass_filter_width = 6; | ||
| 118 | + resampler_ = std::make_unique<LinearResample>( | ||
| 119 | + actual_sample_rate_, expected_sample_rate_, lowpass_cutoff, | ||
| 120 | + lowpass_filter_width); | ||
| 121 | + } else { | ||
| 122 | + fprintf(stderr, "Current sample rate: %d\n", actual_sample_rate_); | ||
| 123 | + } | ||
| 124 | + | ||
| 125 | + err = snd_pcm_hw_params(capture_handle_, hw_params); | ||
| 126 | + if (err) { | ||
| 127 | + fprintf(stderr, "Failed to set hw params: %s\n", snd_strerror(err)); | ||
| 128 | + exit(-1); | ||
| 129 | + } | ||
| 130 | + | ||
| 131 | + err = snd_pcm_prepare(capture_handle_); | ||
| 132 | + if (err) { | ||
| 133 | + fprintf(stderr, "Failed to prepare for recording: %s\n", snd_strerror(err)); | ||
| 134 | + exit(-1); | ||
| 135 | + } | ||
| 136 | + | ||
| 137 | + fprintf(stderr, "Recording started!\n"); | ||
| 138 | +} | ||
| 139 | + | ||
| 140 | +Alsa::~Alsa() { snd_pcm_close(capture_handle_); } | ||
| 141 | + | ||
| 142 | +const std::vector<float> &Alsa::Read(int32_t num_samples) { | ||
| 143 | + samples_.resize(num_samples * actual_channel_count_); | ||
| 144 | + | ||
| 145 | + // count is in frames. Each frame contains actual_channel_count_ samples | ||
| 146 | + int32_t count = snd_pcm_readi(capture_handle_, samples_.data(), num_samples); | ||
| 147 | + | ||
| 148 | + samples_.resize(count * actual_channel_count_); | ||
| 149 | + | ||
| 150 | + ToFloat(samples_, actual_channel_count_, &samples1_); | ||
| 151 | + | ||
| 152 | + if (!resampler_) { | ||
| 153 | + return samples1_; | ||
| 154 | + } | ||
| 155 | + | ||
| 156 | + resampler_->Resample(samples1_.data(), samples_.size(), false, &samples2_); | ||
| 157 | + return samples2_; | ||
| 158 | +} | ||
| 159 | + | ||
| 160 | +} // namespace sherpa_onnx | ||
| 161 | + | ||
| 162 | +#endif |
sherpa-onnx/csrc/alsa.h
0 → 100644
| 1 | +// sherpa-onnx/csrc/sherpa-alsa.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_ALSA_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_ALSA_H_ | ||
| 7 | + | ||
| 8 | +#include <memory> | ||
| 9 | +#include <vector> | ||
| 10 | + | ||
| 11 | +#include "alsa/asoundlib.h" | ||
| 12 | +#include "sherpa-onnx/csrc/resample.h" | ||
| 13 | + | ||
| 14 | +namespace sherpa_onnx { | ||
| 15 | + | ||
| 16 | +class Alsa { | ||
| 17 | + public: | ||
| 18 | + explicit Alsa(const char *device_name); | ||
| 19 | + ~Alsa(); | ||
| 20 | + | ||
| 21 | + // This is a blocking read. | ||
| 22 | + // | ||
| 23 | + // @param num_samples Number of samples to read. | ||
| 24 | + // | ||
| 25 | + // The returned value is valid until the next call to Read(). | ||
| 26 | + const std::vector<float> &Read(int32_t num_samples); | ||
| 27 | + | ||
| 28 | + int32_t GetExpectedSampleRate() const { return expected_sample_rate_; } | ||
| 29 | + int32_t GetActualSampleRate() const { return actual_sample_rate_; } | ||
| 30 | + | ||
| 31 | + private: | ||
| 32 | + snd_pcm_t *capture_handle_; | ||
| 33 | + int32_t expected_sample_rate_ = 16000; | ||
| 34 | + int32_t actual_sample_rate_; | ||
| 35 | + | ||
| 36 | + int32_t actual_channel_count_ = 1; | ||
| 37 | + | ||
| 38 | + std::unique_ptr<LinearResample> resampler_; | ||
| 39 | + std::vector<int16_t> samples_; // directly from the microphone | ||
| 40 | + std::vector<float> samples1_; // normalized version of samples_ | ||
| 41 | + std::vector<float> samples2_; // possibly resampled from samples1_ | ||
| 42 | +}; | ||
| 43 | + | ||
| 44 | +} // namespace sherpa_onnx | ||
| 45 | + | ||
| 46 | +#endif // SHERPA_ONNX_CSRC_ALSA_H_ |
sherpa-onnx/csrc/display.h
0 → 100644
| 1 | +// sherpa-onnx/csrc/display.h | ||
| 2 | +// | ||
| 3 | +// Copyright (c) 2022-2023 Xiaomi Corporation | ||
| 4 | + | ||
| 5 | +#ifndef SHERPA_ONNX_CSRC_DISPLAY_H_ | ||
| 6 | +#define SHERPA_ONNX_CSRC_DISPLAY_H_ | ||
| 7 | +#include <stdio.h> | ||
| 8 | + | ||
| 9 | +#include <string> | ||
| 10 | + | ||
| 11 | +namespace sherpa_onnx { | ||
| 12 | + | ||
| 13 | +class Display { | ||
| 14 | + public: | ||
| 15 | + void Print(int32_t segment_id, const std::string &s) { | ||
| 16 | +#ifdef _MSC_VER | ||
| 17 | + fprintf(stderr, "%d:%s\n", segment_id, s.c_str()); | ||
| 18 | + return; | ||
| 19 | +#endif | ||
| 20 | + if (last_segment_ == segment_id) { | ||
| 21 | + Clear(); | ||
| 22 | + } else { | ||
| 23 | + if (last_segment_ != -1) { | ||
| 24 | + fprintf(stderr, "\n\r"); | ||
| 25 | + } | ||
| 26 | + last_segment_ = segment_id; | ||
| 27 | + num_previous_lines_ = 0; | ||
| 28 | + } | ||
| 29 | + | ||
| 30 | + fprintf(stderr, "\r%d:", segment_id); | ||
| 31 | + | ||
| 32 | + int32_t i = 0; | ||
| 33 | + for (size_t n = 0; n < s.size();) { | ||
| 34 | + if (s[n] > 0 && s[n] < 0x7f) { | ||
| 35 | + fprintf(stderr, "%c", s[n]); | ||
| 36 | + ++n; | ||
| 37 | + } else { | ||
| 38 | + // Each Chinese character occupies 3 bytes for UTF-8 encoding. | ||
| 39 | + std::string tmp(s.begin() + n, s.begin() + n + 3); | ||
| 40 | + fprintf(stderr, "%s", tmp.data()); | ||
| 41 | + n += 3; | ||
| 42 | + } | ||
| 43 | + | ||
| 44 | + ++i; | ||
| 45 | + if (i >= max_word_per_line_ && n + 1 < s.size() && | ||
| 46 | + (s[n] == ' ' || s[n] < 0)) { | ||
| 47 | + fprintf(stderr, "\n\r "); | ||
| 48 | + ++num_previous_lines_; | ||
| 49 | + i = 0; | ||
| 50 | + } | ||
| 51 | + } | ||
| 52 | + } | ||
| 53 | + | ||
| 54 | + private: | ||
| 55 | + // Clear the output for the current segment | ||
| 56 | + void Clear() { | ||
| 57 | + ClearCurrentLine(); | ||
| 58 | + while (num_previous_lines_ > 0) { | ||
| 59 | + GoUpOneLine(); | ||
| 60 | + ClearCurrentLine(); | ||
| 61 | + --num_previous_lines_; | ||
| 62 | + } | ||
| 63 | + } | ||
| 64 | + | ||
| 65 | + // Clear the current line | ||
| 66 | + void ClearCurrentLine() const { fprintf(stderr, "\33[2K\r"); } | ||
| 67 | + | ||
| 68 | + // Move the cursor to the previous line | ||
| 69 | + void GoUpOneLine() const { fprintf(stderr, "\033[1A\r"); } | ||
| 70 | + | ||
| 71 | + private: | ||
| 72 | + int32_t max_word_per_line_ = 60; | ||
| 73 | + int32_t num_previous_lines_ = 0; | ||
| 74 | + int32_t last_segment_ = -1; | ||
| 75 | +}; | ||
| 76 | + | ||
| 77 | +} // namespace sherpa_onnx | ||
| 78 | + | ||
| 79 | +#endif // SHERPA_ONNX_CSRC_DISPLAY_H_ |
sherpa-onnx/csrc/resample.cc
0 → 100644
| 1 | +/** | ||
| 2 | + * Copyright 2013 Pegah Ghahremani | ||
| 3 | + * 2014 IMSL, PKU-HKUST (author: Wei Shi) | ||
| 4 | + * 2014 Yanqing Sun, Junjie Wang | ||
| 5 | + * 2014 Johns Hopkins University (author: Daniel Povey) | ||
| 6 | + * Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 7 | + * | ||
| 8 | + * See LICENSE for clarification regarding multiple authors | ||
| 9 | + * | ||
| 10 | + * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 11 | + * you may not use this file except in compliance with the License. | ||
| 12 | + * You may obtain a copy of the License at | ||
| 13 | + * | ||
| 14 | + * http://www.apache.org/licenses/LICENSE-2.0 | ||
| 15 | + * | ||
| 16 | + * Unless required by applicable law or agreed to in writing, software | ||
| 17 | + * distributed under the License is distributed on an "AS IS" BASIS, | ||
| 18 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 19 | + * See the License for the specific language governing permissions and | ||
| 20 | + * limitations under the License. | ||
| 21 | + */ | ||
| 22 | +// this file is copied and modified from | ||
| 23 | +// kaldi/src/feat/resample.cc | ||
| 24 | + | ||
| 25 | +#include "sherpa-onnx/csrc/resample.h" | ||
| 26 | + | ||
| 27 | +#include <assert.h> | ||
| 28 | +#include <math.h> | ||
| 29 | +#include <stdio.h> | ||
| 30 | + | ||
| 31 | +#include <cstdlib> | ||
| 32 | +#include <type_traits> | ||
| 33 | + | ||
| 34 | +#ifndef M_2PI | ||
| 35 | +#define M_2PI 6.283185307179586476925286766559005 | ||
| 36 | +#endif | ||
| 37 | + | ||
| 38 | +#ifndef M_PI | ||
| 39 | +#define M_PI 3.1415926535897932384626433832795 | ||
| 40 | +#endif | ||
| 41 | + | ||
| 42 | +namespace sherpa_onnx { | ||
| 43 | + | ||
| 44 | +template <class I> | ||
| 45 | +I Gcd(I m, I n) { | ||
| 46 | + // this function is copied from kaldi/src/base/kaldi-math.h | ||
| 47 | + if (m == 0 || n == 0) { | ||
| 48 | + if (m == 0 && n == 0) { // gcd not defined, as all integers are divisors. | ||
| 49 | + fprintf(stderr, "Undefined GCD since m = 0, n = 0."); | ||
| 50 | + exit(-1); | ||
| 51 | + } | ||
| 52 | + return (m == 0 ? (n > 0 ? n : -n) : (m > 0 ? m : -m)); | ||
| 53 | + // return absolute value of whichever is nonzero | ||
| 54 | + } | ||
| 55 | + // could use compile-time assertion | ||
| 56 | + // but involves messing with complex template stuff. | ||
| 57 | + static_assert(std::is_integral<I>::value, ""); | ||
| 58 | + while (1) { | ||
| 59 | + m %= n; | ||
| 60 | + if (m == 0) return (n > 0 ? n : -n); | ||
| 61 | + n %= m; | ||
| 62 | + if (n == 0) return (m > 0 ? m : -m); | ||
| 63 | + } | ||
| 64 | +} | ||
| 65 | + | ||
| 66 | +/// Returns the least common multiple of two integers. Will | ||
| 67 | +/// crash unless the inputs are positive. | ||
| 68 | +template <class I> | ||
| 69 | +I Lcm(I m, I n) { | ||
| 70 | + // This function is copied from kaldi/src/base/kaldi-math.h | ||
| 71 | + assert(m > 0 && n > 0); | ||
| 72 | + I gcd = Gcd(m, n); | ||
| 73 | + return gcd * (m / gcd) * (n / gcd); | ||
| 74 | +} | ||
| 75 | + | ||
| 76 | +static float DotProduct(const float *a, const float *b, int32_t n) { | ||
| 77 | + float sum = 0; | ||
| 78 | + for (int32_t i = 0; i != n; ++i) { | ||
| 79 | + sum += a[i] * b[i]; | ||
| 80 | + } | ||
| 81 | + return sum; | ||
| 82 | +} | ||
| 83 | + | ||
| 84 | +LinearResample::LinearResample(int32_t samp_rate_in_hz, | ||
| 85 | + int32_t samp_rate_out_hz, float filter_cutoff_hz, | ||
| 86 | + int32_t num_zeros) | ||
| 87 | + : samp_rate_in_(samp_rate_in_hz), | ||
| 88 | + samp_rate_out_(samp_rate_out_hz), | ||
| 89 | + filter_cutoff_(filter_cutoff_hz), | ||
| 90 | + num_zeros_(num_zeros) { | ||
| 91 | + assert(samp_rate_in_hz > 0.0 && samp_rate_out_hz > 0.0 && | ||
| 92 | + filter_cutoff_hz > 0.0 && filter_cutoff_hz * 2 <= samp_rate_in_hz && | ||
| 93 | + filter_cutoff_hz * 2 <= samp_rate_out_hz && num_zeros > 0); | ||
| 94 | + | ||
| 95 | + // base_freq is the frequency of the repeating unit, which is the gcd | ||
| 96 | + // of the input frequencies. | ||
| 97 | + int32_t base_freq = Gcd(samp_rate_in_, samp_rate_out_); | ||
| 98 | + input_samples_in_unit_ = samp_rate_in_ / base_freq; | ||
| 99 | + output_samples_in_unit_ = samp_rate_out_ / base_freq; | ||
| 100 | + | ||
| 101 | + SetIndexesAndWeights(); | ||
| 102 | + Reset(); | ||
| 103 | +} | ||
| 104 | + | ||
| 105 | +void LinearResample::SetIndexesAndWeights() { | ||
| 106 | + first_index_.resize(output_samples_in_unit_); | ||
| 107 | + weights_.resize(output_samples_in_unit_); | ||
| 108 | + | ||
| 109 | + double window_width = num_zeros_ / (2.0 * filter_cutoff_); | ||
| 110 | + | ||
| 111 | + for (int32_t i = 0; i < output_samples_in_unit_; i++) { | ||
| 112 | + double output_t = i / static_cast<double>(samp_rate_out_); | ||
| 113 | + double min_t = output_t - window_width, max_t = output_t + window_width; | ||
| 114 | + // we do ceil on the min and floor on the max, because if we did it | ||
| 115 | + // the other way around we would unnecessarily include indexes just | ||
| 116 | + // outside the window, with zero coefficients. It's possible | ||
| 117 | + // if the arguments to the ceil and floor expressions are integers | ||
| 118 | + // (e.g. if filter_cutoff_ has an exact ratio with the sample rates), | ||
| 119 | + // that we unnecessarily include something with a zero coefficient, | ||
| 120 | + // but this is only a slight efficiency issue. | ||
| 121 | + int32_t min_input_index = ceil(min_t * samp_rate_in_), | ||
| 122 | + max_input_index = floor(max_t * samp_rate_in_), | ||
| 123 | + num_indices = max_input_index - min_input_index + 1; | ||
| 124 | + first_index_[i] = min_input_index; | ||
| 125 | + weights_[i].resize(num_indices); | ||
| 126 | + for (int32_t j = 0; j < num_indices; j++) { | ||
| 127 | + int32_t input_index = min_input_index + j; | ||
| 128 | + double input_t = input_index / static_cast<double>(samp_rate_in_), | ||
| 129 | + delta_t = input_t - output_t; | ||
| 130 | + // sign of delta_t doesn't matter. | ||
| 131 | + weights_[i][j] = FilterFunc(delta_t) / samp_rate_in_; | ||
| 132 | + } | ||
| 133 | + } | ||
| 134 | +} | ||
| 135 | + | ||
| 136 | +/** Here, t is a time in seconds representing an offset from | ||
| 137 | + the center of the windowed filter function, and FilterFunction(t) | ||
| 138 | + returns the windowed filter function, described | ||
| 139 | + in the header as h(t) = f(t)g(t), evaluated at t. | ||
| 140 | +*/ | ||
| 141 | +float LinearResample::FilterFunc(float t) const { | ||
| 142 | + float window, // raised-cosine (Hanning) window of width | ||
| 143 | + // num_zeros_/2*filter_cutoff_ | ||
| 144 | + filter; // sinc filter function | ||
| 145 | + if (fabs(t) < num_zeros_ / (2.0 * filter_cutoff_)) | ||
| 146 | + window = 0.5 * (1 + cos(M_2PI * filter_cutoff_ / num_zeros_ * t)); | ||
| 147 | + else | ||
| 148 | + window = 0.0; // outside support of window function | ||
| 149 | + if (t != 0) | ||
| 150 | + filter = sin(M_2PI * filter_cutoff_ * t) / (M_PI * t); | ||
| 151 | + else | ||
| 152 | + filter = 2 * filter_cutoff_; // limit of the function at t = 0 | ||
| 153 | + return filter * window; | ||
| 154 | +} | ||
| 155 | + | ||
| 156 | +void LinearResample::Reset() { | ||
| 157 | + input_sample_offset_ = 0; | ||
| 158 | + output_sample_offset_ = 0; | ||
| 159 | + input_remainder_.resize(0); | ||
| 160 | +} | ||
| 161 | + | ||
| 162 | +void LinearResample::Resample(const float *input, int32_t input_dim, bool flush, | ||
| 163 | + std::vector<float> *output) { | ||
| 164 | + int64_t tot_input_samp = input_sample_offset_ + input_dim, | ||
| 165 | + tot_output_samp = GetNumOutputSamples(tot_input_samp, flush); | ||
| 166 | + | ||
| 167 | + assert(tot_output_samp >= output_sample_offset_); | ||
| 168 | + | ||
| 169 | + output->resize(tot_output_samp - output_sample_offset_); | ||
| 170 | + | ||
| 171 | + // samp_out is the index into the total output signal, not just the part | ||
| 172 | + // of it we are producing here. | ||
| 173 | + for (int64_t samp_out = output_sample_offset_; samp_out < tot_output_samp; | ||
| 174 | + samp_out++) { | ||
| 175 | + int64_t first_samp_in; | ||
| 176 | + int32_t samp_out_wrapped; | ||
| 177 | + GetIndexes(samp_out, &first_samp_in, &samp_out_wrapped); | ||
| 178 | + const std::vector<float> &weights = weights_[samp_out_wrapped]; | ||
| 179 | + // first_input_index is the first index into "input" that we have a weight | ||
| 180 | + // for. | ||
| 181 | + int32_t first_input_index = | ||
| 182 | + static_cast<int32_t>(first_samp_in - input_sample_offset_); | ||
| 183 | + float this_output; | ||
| 184 | + if (first_input_index >= 0 && | ||
| 185 | + first_input_index + static_cast<int32_t>(weights.size()) <= input_dim) { | ||
| 186 | + this_output = | ||
| 187 | + DotProduct(input + first_input_index, weights.data(), weights.size()); | ||
| 188 | + } else { // Handle edge cases. | ||
| 189 | + this_output = 0.0; | ||
| 190 | + for (int32_t i = 0; i < static_cast<int32_t>(weights.size()); i++) { | ||
| 191 | + float weight = weights[i]; | ||
| 192 | + int32_t input_index = first_input_index + i; | ||
| 193 | + if (input_index < 0 && | ||
| 194 | + static_cast<int32_t>(input_remainder_.size()) + input_index >= 0) { | ||
| 195 | + this_output += | ||
| 196 | + weight * input_remainder_[input_remainder_.size() + input_index]; | ||
| 197 | + } else if (input_index >= 0 && input_index < input_dim) { | ||
| 198 | + this_output += weight * input[input_index]; | ||
| 199 | + } else if (input_index >= input_dim) { | ||
| 200 | + // We're past the end of the input and are adding zero; should only | ||
| 201 | + // happen if the user specified flush == true, or else we would not | ||
| 202 | + // be trying to output this sample. | ||
| 203 | + assert(flush); | ||
| 204 | + } | ||
| 205 | + } | ||
| 206 | + } | ||
| 207 | + int32_t output_index = | ||
| 208 | + static_cast<int32_t>(samp_out - output_sample_offset_); | ||
| 209 | + (*output)[output_index] = this_output; | ||
| 210 | + } | ||
| 211 | + | ||
| 212 | + if (flush) { | ||
| 213 | + Reset(); // Reset the internal state. | ||
| 214 | + } else { | ||
| 215 | + SetRemainder(input, input_dim); | ||
| 216 | + input_sample_offset_ = tot_input_samp; | ||
| 217 | + output_sample_offset_ = tot_output_samp; | ||
| 218 | + } | ||
| 219 | +} | ||
| 220 | + | ||
| 221 | +int64_t LinearResample::GetNumOutputSamples(int64_t input_num_samp, | ||
| 222 | + bool flush) const { | ||
| 223 | + // For exact computation, we measure time in "ticks" of 1.0 / tick_freq, | ||
| 224 | + // where tick_freq is the least common multiple of samp_rate_in_ and | ||
| 225 | + // samp_rate_out_. | ||
| 226 | + int32_t tick_freq = Lcm(samp_rate_in_, samp_rate_out_); | ||
| 227 | + int32_t ticks_per_input_period = tick_freq / samp_rate_in_; | ||
| 228 | + | ||
| 229 | + // work out the number of ticks in the time interval | ||
| 230 | + // [ 0, input_num_samp/samp_rate_in_ ). | ||
| 231 | + int64_t interval_length_in_ticks = input_num_samp * ticks_per_input_period; | ||
| 232 | + if (!flush) { | ||
| 233 | + float window_width = num_zeros_ / (2.0 * filter_cutoff_); | ||
| 234 | + // To count the window-width in ticks we take the floor. This | ||
| 235 | + // is because since we're looking for the largest integer num-out-samp | ||
| 236 | + // that fits in the interval, which is open on the right, a reduction | ||
| 237 | + // in interval length of less than a tick will never make a difference. | ||
| 238 | + // For example, the largest integer in the interval [ 0, 2 ) and the | ||
| 239 | + // largest integer in the interval [ 0, 2 - 0.9 ) are the same (both one). | ||
| 240 | + // So when we're subtracting the window-width we can ignore the fractional | ||
| 241 | + // part. | ||
| 242 | + int32_t window_width_ticks = floor(window_width * tick_freq); | ||
| 243 | + // The time-period of the output that we can sample gets reduced | ||
| 244 | + // by the window-width (which is actually the distance from the | ||
| 245 | + // center to the edge of the windowing function) if we're not | ||
| 246 | + // "flushing the output". | ||
| 247 | + interval_length_in_ticks -= window_width_ticks; | ||
| 248 | + } | ||
| 249 | + if (interval_length_in_ticks <= 0) return 0; | ||
| 250 | + | ||
| 251 | + int32_t ticks_per_output_period = tick_freq / samp_rate_out_; | ||
| 252 | + // Get the last output-sample in the closed interval, i.e. replacing [ ) with | ||
| 253 | + // [ ]. Note: integer division rounds down. See | ||
| 254 | + // http://en.wikipedia.org/wiki/Interval_(mathematics) for an explanation of | ||
| 255 | + // the notation. | ||
| 256 | + int64_t last_output_samp = interval_length_in_ticks / ticks_per_output_period; | ||
| 257 | + // We need the last output-sample in the open interval, so if it takes us to | ||
| 258 | + // the end of the interval exactly, subtract one. | ||
| 259 | + if (last_output_samp * ticks_per_output_period == interval_length_in_ticks) | ||
| 260 | + last_output_samp--; | ||
| 261 | + | ||
| 262 | + // First output-sample index is zero, so the number of output samples | ||
| 263 | + // is the last output-sample plus one. | ||
| 264 | + int64_t num_output_samp = last_output_samp + 1; | ||
| 265 | + return num_output_samp; | ||
| 266 | +} | ||
| 267 | + | ||
| 268 | +// inline | ||
| 269 | +void LinearResample::GetIndexes(int64_t samp_out, int64_t *first_samp_in, | ||
| 270 | + int32_t *samp_out_wrapped) const { | ||
| 271 | + // A unit is the smallest nonzero amount of time that is an exact | ||
| 272 | + // multiple of the input and output sample periods. The unit index | ||
| 273 | + // is the answer to "which numbered unit we are in". | ||
| 274 | + int64_t unit_index = samp_out / output_samples_in_unit_; | ||
| 275 | + // samp_out_wrapped is equal to samp_out % output_samples_in_unit_ | ||
| 276 | + *samp_out_wrapped = | ||
| 277 | + static_cast<int32_t>(samp_out - unit_index * output_samples_in_unit_); | ||
| 278 | + *first_samp_in = | ||
| 279 | + first_index_[*samp_out_wrapped] + unit_index * input_samples_in_unit_; | ||
| 280 | +} | ||
| 281 | + | ||
| 282 | +void LinearResample::SetRemainder(const float *input, int32_t input_dim) { | ||
| 283 | + std::vector<float> old_remainder(input_remainder_); | ||
| 284 | + // max_remainder_needed is the width of the filter from side to side, | ||
| 285 | + // measured in input samples. you might think it should be half that, | ||
| 286 | + // but you have to consider that you might be wanting to output samples | ||
| 287 | + // that are "in the past" relative to the beginning of the latest | ||
| 288 | + // input... anyway, storing more remainder than needed is not harmful. | ||
| 289 | + int32_t max_remainder_needed = | ||
| 290 | + ceil(samp_rate_in_ * num_zeros_ / filter_cutoff_); | ||
| 291 | + input_remainder_.resize(max_remainder_needed); | ||
| 292 | + for (int32_t index = -static_cast<int32_t>(input_remainder_.size()); | ||
| 293 | + index < 0; index++) { | ||
| 294 | + // we interpret "index" as an offset from the end of "input" and | ||
| 295 | + // from the end of input_remainder_. | ||
| 296 | + int32_t input_index = index + input_dim; | ||
| 297 | + if (input_index >= 0) { | ||
| 298 | + input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] = | ||
| 299 | + input[input_index]; | ||
| 300 | + } else if (input_index + static_cast<int32_t>(old_remainder.size()) >= 0) { | ||
| 301 | + input_remainder_[index + static_cast<int32_t>(input_remainder_.size())] = | ||
| 302 | + old_remainder[input_index + | ||
| 303 | + static_cast<int32_t>(old_remainder.size())]; | ||
| 304 | + // else leave it at zero. | ||
| 305 | + } | ||
| 306 | + } | ||
| 307 | +} | ||
| 308 | + | ||
| 309 | +} // namespace sherpa_onnx |
sherpa-onnx/csrc/resample.h
0 → 100644
| 1 | +/** | ||
| 2 | + * Copyright 2013 Pegah Ghahremani | ||
| 3 | + * 2014 IMSL, PKU-HKUST (author: Wei Shi) | ||
| 4 | + * 2014 Yanqing Sun, Junjie Wang | ||
| 5 | + * 2014 Johns Hopkins University (author: Daniel Povey) | ||
| 6 | + * Copyright 2023 Xiaomi Corporation (authors: Fangjun Kuang) | ||
| 7 | + * | ||
| 8 | + * See LICENSE for clarification regarding multiple authors | ||
| 9 | + * | ||
| 10 | + * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 11 | + * you may not use this file except in compliance with the License. | ||
| 12 | + * You may obtain a copy of the License at | ||
| 13 | + * | ||
| 14 | + * http://www.apache.org/licenses/LICENSE-2.0 | ||
| 15 | + * | ||
| 16 | + * Unless required by applicable law or agreed to in writing, software | ||
| 17 | + * distributed under the License is distributed on an "AS IS" BASIS, | ||
| 18 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 19 | + * See the License for the specific language governing permissions and | ||
| 20 | + * limitations under the License. | ||
| 21 | + */ | ||
| 22 | +// this file is copied and modified from | ||
| 23 | +// kaldi/src/feat/resample.h | ||
| 24 | +#ifndef SHERPA_ONNX_CSRC_RESAMPLE_H_ | ||
| 25 | +#define SHERPA_ONNX_CSRC_RESAMPLE_H_ | ||
| 26 | + | ||
| 27 | +#include <cstdint> | ||
| 28 | +#include <vector> | ||
| 29 | + | ||
| 30 | +namespace sherpa_onnx { | ||
| 31 | + | ||
| 32 | +/* | ||
| 33 | + We require that the input and output sampling rate be specified as | ||
| 34 | + integers, as this is an easy way to specify that their ratio be rational. | ||
| 35 | +*/ | ||
| 36 | + | ||
| 37 | +class LinearResample { | ||
| 38 | + public: | ||
| 39 | + /// Constructor. We make the input and output sample rates integers, because | ||
| 40 | + /// we are going to need to find a common divisor. This should just remind | ||
| 41 | + /// you that they need to be integers. The filter cutoff needs to be less | ||
| 42 | + /// than samp_rate_in_hz/2 and less than samp_rate_out_hz/2. num_zeros | ||
| 43 | + /// controls the sharpness of the filter, more == sharper but less efficient. | ||
| 44 | + /// We suggest around 4 to 10 for normal use. | ||
| 45 | + LinearResample(int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, | ||
| 46 | + float filter_cutoff_hz, int32_t num_zeros); | ||
| 47 | + | ||
| 48 | + /// Calling the function Reset() resets the state of the object prior to | ||
| 49 | + /// processing a new signal; it is only necessary if you have called | ||
| 50 | + /// Resample(x, x_size, false, y) for some signal, leading to a remainder of | ||
| 51 | + /// the signal being called, but then abandon processing the signal before | ||
| 52 | + /// calling Resample(x, x_size, true, y) for the last piece. Call it | ||
| 53 | + /// unnecessarily between signals will not do any harm. | ||
| 54 | + void Reset(); | ||
| 55 | + | ||
| 56 | + /// This function does the resampling. If you call it with flush == true and | ||
| 57 | + /// you have never called it with flush == false, it just resamples the input | ||
| 58 | + /// signal (it resizes the output to a suitable number of samples). | ||
| 59 | + /// | ||
| 60 | + /// You can also use this function to process a signal a piece at a time. | ||
| 61 | + /// suppose you break it into piece1, piece2, ... pieceN. You can call | ||
| 62 | + /// \code{.cc} | ||
| 63 | + /// Resample(piece1, piece1_size, false, &output1); | ||
| 64 | + /// Resample(piece2, piece2_size, false, &output2); | ||
| 65 | + /// Resample(piece3, piece3_size, true, &output3); | ||
| 66 | + /// \endcode | ||
| 67 | + /// If you call it with flush == false, it won't output the last few samples | ||
| 68 | + /// but will remember them, so that if you later give it a second piece of | ||
| 69 | + /// the input signal it can process it correctly. | ||
| 70 | + /// If your most recent call to the object was with flush == false, it will | ||
| 71 | + /// have internal state; you can remove this by calling Reset(). | ||
| 72 | + /// Empty input is acceptable. | ||
| 73 | + void Resample(const float *input, int32_t input_dim, bool flush, | ||
| 74 | + std::vector<float> *output); | ||
| 75 | + | ||
| 76 | + //// Return the input and output sampling rates (for checks, for example) | ||
| 77 | + int32_t GetInputSamplingRate() const { return samp_rate_in_; } | ||
| 78 | + int32_t GetOutputSamplingRate() const { return samp_rate_out_; } | ||
| 79 | + | ||
| 80 | + private: | ||
| 81 | + void SetIndexesAndWeights(); | ||
| 82 | + | ||
| 83 | + float FilterFunc(float) const; | ||
| 84 | + | ||
| 85 | + /// This function outputs the number of output samples we will output | ||
| 86 | + /// for a signal with "input_num_samp" input samples. If flush == true, | ||
| 87 | + /// we return the largest n such that | ||
| 88 | + /// (n/samp_rate_out_) is in the interval [ 0, input_num_samp/samp_rate_in_ ), | ||
| 89 | + /// and note that the interval is half-open. If flush == false, | ||
| 90 | + /// define window_width as num_zeros / (2.0 * filter_cutoff_); | ||
| 91 | + /// we return the largest n such that (n/samp_rate_out_) is in the interval | ||
| 92 | + /// [ 0, input_num_samp/samp_rate_in_ - window_width ). | ||
| 93 | + int64_t GetNumOutputSamples(int64_t input_num_samp, bool flush) const; | ||
| 94 | + | ||
| 95 | + /// Given an output-sample index, this function outputs to *first_samp_in the | ||
| 96 | + /// first input-sample index that we have a weight on (may be negative), | ||
| 97 | + /// and to *samp_out_wrapped the index into weights_ where we can get the | ||
| 98 | + /// corresponding weights on the input. | ||
| 99 | + inline void GetIndexes(int64_t samp_out, int64_t *first_samp_in, | ||
| 100 | + int32_t *samp_out_wrapped) const; | ||
| 101 | + | ||
| 102 | + void SetRemainder(const float *input, int32_t input_dim); | ||
| 103 | + | ||
| 104 | + private: | ||
| 105 | + // The following variables are provided by the user. | ||
| 106 | + int32_t samp_rate_in_; | ||
| 107 | + int32_t samp_rate_out_; | ||
| 108 | + float filter_cutoff_; | ||
| 109 | + int32_t num_zeros_; | ||
| 110 | + | ||
| 111 | + int32_t input_samples_in_unit_; ///< The number of input samples in the | ||
| 112 | + ///< smallest repeating unit: num_samp_in_ = | ||
| 113 | + ///< samp_rate_in_hz / Gcd(samp_rate_in_hz, | ||
| 114 | + ///< samp_rate_out_hz) | ||
| 115 | + | ||
| 116 | + int32_t output_samples_in_unit_; ///< The number of output samples in the | ||
| 117 | + ///< smallest repeating unit: num_samp_out_ | ||
| 118 | + ///< = samp_rate_out_hz / | ||
| 119 | + ///< Gcd(samp_rate_in_hz, samp_rate_out_hz) | ||
| 120 | + | ||
| 121 | + /// The first input-sample index that we sum over, for this output-sample | ||
| 122 | + /// index. May be negative; any truncation at the beginning is handled | ||
| 123 | + /// separately. This is just for the first few output samples, but we can | ||
| 124 | + /// extrapolate the correct input-sample index for arbitrary output samples. | ||
| 125 | + std::vector<int32_t> first_index_; | ||
| 126 | + | ||
| 127 | + /// Weights on the input samples, for this output-sample index. | ||
| 128 | + std::vector<std::vector<float>> weights_; | ||
| 129 | + | ||
| 130 | + // the following variables keep track of where we are in a particular signal, | ||
| 131 | + // if it is being provided over multiple calls to Resample(). | ||
| 132 | + | ||
| 133 | + int64_t input_sample_offset_; ///< The number of input samples we have | ||
| 134 | + ///< already received for this signal | ||
| 135 | + ///< (including anything in remainder_) | ||
| 136 | + int64_t output_sample_offset_; ///< The number of samples we have already | ||
| 137 | + ///< output for this signal. | ||
| 138 | + std::vector<float> input_remainder_; ///< A small trailing part of the | ||
| 139 | + ///< previously seen input signal. | ||
| 140 | +}; | ||
| 141 | + | ||
| 142 | +} // namespace sherpa_onnx | ||
| 143 | + | ||
| 144 | +#endif // SHERPA_ONNX_CSRC_RESAMPLE_H_ |
| @@ -10,9 +10,6 @@ | @@ -10,9 +10,6 @@ | ||
| 10 | 10 | ||
| 11 | #include "sherpa-onnx/csrc/online-recognizer.h" | 11 | #include "sherpa-onnx/csrc/online-recognizer.h" |
| 12 | #include "sherpa-onnx/csrc/online-stream.h" | 12 | #include "sherpa-onnx/csrc/online-stream.h" |
| 13 | -#include "sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h" | ||
| 14 | -#include "sherpa-onnx/csrc/online-transducer-model-config.h" | ||
| 15 | -#include "sherpa-onnx/csrc/online-transducer-model.h" | ||
| 16 | #include "sherpa-onnx/csrc/symbol-table.h" | 13 | #include "sherpa-onnx/csrc/symbol-table.h" |
| 17 | #include "sherpa-onnx/csrc/wave-reader.h" | 14 | #include "sherpa-onnx/csrc/wave-reader.h" |
| 18 | 15 |
toolchains/aarch64-linux-gnu.toolchain.cmake
0 → 100644
| 1 | +# Copied from https://github.com/Tencent/ncnn/blob/master/toolchains/aarch64-linux-gnu.toolchain.cmake | ||
| 2 | + | ||
| 3 | +set(CMAKE_SYSTEM_NAME Linux) | ||
| 4 | +set(CMAKE_SYSTEM_PROCESSOR aarch64) | ||
| 5 | + | ||
| 6 | +set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") | ||
| 7 | +set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") | ||
| 8 | + | ||
| 9 | +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) | ||
| 10 | +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) | ||
| 11 | +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) | ||
| 12 | + | ||
| 13 | +set(CMAKE_C_FLAGS "-march=armv8-a") | ||
| 14 | +set(CMAKE_CXX_FLAGS "-march=armv8-a") | ||
| 15 | + | ||
| 16 | +# cache flags | ||
| 17 | +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") | ||
| 18 | +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") |
-
请 注册 或 登录 后发表评论