Committed by
GitHub
Update kaldi-native-fbank. (#2259)
Now it supports FFT of an even number, not necessarily a power of 2.
正在显示
9 个修改的文件
包含
28 行增加
和
16 行删除
| 1 | function(download_kaldi_native_fbank) | 1 | function(download_kaldi_native_fbank) |
| 2 | include(FetchContent) | 2 | include(FetchContent) |
| 3 | 3 | ||
| 4 | - set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.21.1.tar.gz") | ||
| 5 | - set(kaldi_native_fbank_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.21.1.tar.gz") | ||
| 6 | - set(kaldi_native_fbank_HASH "SHA256=37c1aa230b00fe062791d800d8fc50aa3de215918d3dce6440699e67275d859e") | 4 | + set(kaldi_native_fbank_URL "https://github.com/csukuangfj/kaldi-native-fbank/archive/refs/tags/v1.21.2.tar.gz") |
| 5 | + set(kaldi_native_fbank_URL2 "https://hf-mirror.com/csukuangfj/sherpa-onnx-cmake-deps/resolve/main/kaldi-native-fbank-1.21.2.tar.gz") | ||
| 6 | + set(kaldi_native_fbank_HASH "SHA256=f4bd7d53fe8aeaecc4eda9680c72696bb86bf74e86371d81aacacd6f4ca3914d") | ||
| 7 | 7 | ||
| 8 | set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE) | 8 | set(KALDI_NATIVE_FBANK_BUILD_TESTS OFF CACHE BOOL "" FORCE) |
| 9 | set(KALDI_NATIVE_FBANK_BUILD_PYTHON OFF CACHE BOOL "" FORCE) | 9 | set(KALDI_NATIVE_FBANK_BUILD_PYTHON OFF CACHE BOOL "" FORCE) |
| @@ -12,11 +12,11 @@ function(download_kaldi_native_fbank) | @@ -12,11 +12,11 @@ function(download_kaldi_native_fbank) | ||
| 12 | # If you don't have access to the Internet, | 12 | # If you don't have access to the Internet, |
| 13 | # please pre-download kaldi-native-fbank | 13 | # please pre-download kaldi-native-fbank |
| 14 | set(possible_file_locations | 14 | set(possible_file_locations |
| 15 | - $ENV{HOME}/Downloads/kaldi-native-fbank-1.21.1.tar.gz | ||
| 16 | - ${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.21.1.tar.gz | ||
| 17 | - ${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.21.1.tar.gz | ||
| 18 | - /tmp/kaldi-native-fbank-1.21.1.tar.gz | ||
| 19 | - /star-fj/fangjun/download/github/kaldi-native-fbank-1.21.1.tar.gz | 15 | + $ENV{HOME}/Downloads/kaldi-native-fbank-1.21.2.tar.gz |
| 16 | + ${CMAKE_SOURCE_DIR}/kaldi-native-fbank-1.21.2.tar.gz | ||
| 17 | + ${CMAKE_BINARY_DIR}/kaldi-native-fbank-1.21.2.tar.gz | ||
| 18 | + /tmp/kaldi-native-fbank-1.21.2.tar.gz | ||
| 19 | + /star-fj/fangjun/download/github/kaldi-native-fbank-1.21.2.tar.gz | ||
| 20 | ) | 20 | ) |
| 21 | 21 | ||
| 22 | foreach(f IN LISTS possible_file_locations) | 22 | foreach(f IN LISTS possible_file_locations) |
| @@ -22,4 +22,4 @@ Cflags: -I"${includedir}" | @@ -22,4 +22,4 @@ Cflags: -I"${includedir}" | ||
| 22 | # Note: -lcargs is required only for the following file | 22 | # Note: -lcargs is required only for the following file |
| 23 | # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c | 23 | # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c |
| 24 | # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c | 24 | # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c |
| 25 | -Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ | 25 | +Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ |
| @@ -22,4 +22,4 @@ Cflags: -I"${includedir}" | @@ -22,4 +22,4 @@ Cflags: -I"${includedir}" | ||
| 22 | # Note: -lcargs is required only for the following file | 22 | # Note: -lcargs is required only for the following file |
| 23 | # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c | 23 | # https://github.com/k2-fsa/sherpa-onnx/blob/master/c-api-examples/decode-file-c-api.c |
| 24 | # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c | 24 | # We add it here so that users don't need to specify -lcargs when compiling decode-file-c-api.c |
| 25 | -Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ | 25 | +Libs: -L"${libdir}" -lsherpa-onnx-c-api -lsherpa-onnx-core -lkaldi-decoder-core -lsherpa-onnx-kaldifst-core -lsherpa-onnx-fstfar -lsherpa-onnx-fst -lkaldi-native-fbank-core -lkissfft-float -lpiper_phonemize -lespeak-ng -lucd -lonnxruntime -lssentencepiece_core -Wl,-rpath,${libdir} @SHERPA_ONNX_PKG_WITH_CARGS@ @SHERPA_ONNX_PKG_CONFIG_EXTRA_LIBS@ |
| @@ -18,9 +18,7 @@ def create_fbank(): | @@ -18,9 +18,7 @@ def create_fbank(): | ||
| 18 | opts.frame_opts.preemph_coeff = 0 | 18 | opts.frame_opts.preemph_coeff = 0 |
| 19 | opts.frame_opts.window_type = "hann" | 19 | opts.frame_opts.window_type = "hann" |
| 20 | 20 | ||
| 21 | - # Even though GigaAM uses 400 for fft, here we use 512 | ||
| 22 | - # since kaldi-native-fbank only supports fft for power of 2. | ||
| 23 | - opts.frame_opts.round_to_power_of_two = True | 21 | + opts.frame_opts.round_to_power_of_two = False |
| 24 | 22 | ||
| 25 | opts.mel_opts.low_freq = 0 | 23 | opts.mel_opts.low_freq = 0 |
| 26 | opts.mel_opts.high_freq = 8000 | 24 | opts.mel_opts.high_freq = 8000 |
| @@ -19,9 +19,7 @@ def create_fbank(): | @@ -19,9 +19,7 @@ def create_fbank(): | ||
| 19 | opts.frame_opts.preemph_coeff = 0 | 19 | opts.frame_opts.preemph_coeff = 0 |
| 20 | opts.frame_opts.window_type = "hann" | 20 | opts.frame_opts.window_type = "hann" |
| 21 | 21 | ||
| 22 | - # Even though GigaAM uses 400 for fft, here we use 512 | ||
| 23 | - # since kaldi-native-fbank only supports fft for power of 2. | ||
| 24 | - opts.frame_opts.round_to_power_of_two = True | 22 | + opts.frame_opts.round_to_power_of_two = False |
| 25 | 23 | ||
| 26 | opts.mel_opts.low_freq = 0 | 24 | opts.mel_opts.low_freq = 0 |
| 27 | opts.mel_opts.high_freq = 8000 | 25 | opts.mel_opts.high_freq = 8000 |
| @@ -197,6 +197,7 @@ class FeatureExtractor::Impl { | @@ -197,6 +197,7 @@ class FeatureExtractor::Impl { | ||
| 197 | opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset; | 197 | opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset; |
| 198 | opts_.frame_opts.preemph_coeff = config_.preemph_coeff; | 198 | opts_.frame_opts.preemph_coeff = config_.preemph_coeff; |
| 199 | opts_.frame_opts.window_type = config_.window_type; | 199 | opts_.frame_opts.window_type = config_.window_type; |
| 200 | + opts_.frame_opts.round_to_power_of_two = config_.round_to_power_of_two; | ||
| 200 | 201 | ||
| 201 | opts_.mel_opts.num_bins = config_.feature_dim; | 202 | opts_.mel_opts.num_bins = config_.feature_dim; |
| 202 | 203 | ||
| @@ -216,6 +217,7 @@ class FeatureExtractor::Impl { | @@ -216,6 +217,7 @@ class FeatureExtractor::Impl { | ||
| 216 | mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset; | 217 | mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset; |
| 217 | mfcc_opts_.frame_opts.preemph_coeff = config_.preemph_coeff; | 218 | mfcc_opts_.frame_opts.preemph_coeff = config_.preemph_coeff; |
| 218 | mfcc_opts_.frame_opts.window_type = config_.window_type; | 219 | mfcc_opts_.frame_opts.window_type = config_.window_type; |
| 220 | + mfcc_opts_.frame_opts.round_to_power_of_two = config_.round_to_power_of_two; | ||
| 219 | 221 | ||
| 220 | mfcc_opts_.mel_opts.num_bins = config_.feature_dim; | 222 | mfcc_opts_.mel_opts.num_bins = config_.feature_dim; |
| 221 | 223 |
| @@ -79,6 +79,8 @@ struct FeatureExtractorConfig { | @@ -79,6 +79,8 @@ struct FeatureExtractorConfig { | ||
| 79 | 79 | ||
| 80 | bool is_mfcc = false; | 80 | bool is_mfcc = false; |
| 81 | 81 | ||
| 82 | + bool round_to_power_of_two = true; | ||
| 83 | + | ||
| 82 | std::string ToString() const; | 84 | std::string ToString() const; |
| 83 | 85 | ||
| 84 | void Register(ParseOptions *po); | 86 | void Register(ParseOptions *po); |
| @@ -109,6 +109,12 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl { | @@ -109,6 +109,12 @@ class OfflineRecognizerCtcImpl : public OfflineRecognizerImpl { | ||
| 109 | config_.feat_config.preemph_coeff = 0; | 109 | config_.feat_config.preemph_coeff = 0; |
| 110 | config_.feat_config.window_type = "hann"; | 110 | config_.feat_config.window_type = "hann"; |
| 111 | config_.feat_config.feature_dim = 64; | 111 | config_.feat_config.feature_dim = 64; |
| 112 | + | ||
| 113 | + // see | ||
| 114 | + // https://github.com/salute-developers/GigaAM/blob/main/gigaam/preprocess.py#L68 | ||
| 115 | + // | ||
| 116 | + // GigaAM uses n_fft 400 | ||
| 117 | + config_.feat_config.round_to_power_of_two = false; | ||
| 112 | } else { | 118 | } else { |
| 113 | config_.feat_config.low_freq = 0; | 119 | config_.feat_config.low_freq = 0; |
| 114 | config_.feat_config.high_freq = 0; | 120 | config_.feat_config.high_freq = 0; |
| @@ -156,6 +156,12 @@ class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl { | @@ -156,6 +156,12 @@ class OfflineRecognizerTransducerNeMoImpl : public OfflineRecognizerImpl { | ||
| 156 | config_.feat_config.preemph_coeff = 0; | 156 | config_.feat_config.preemph_coeff = 0; |
| 157 | config_.feat_config.window_type = "hann"; | 157 | config_.feat_config.window_type = "hann"; |
| 158 | config_.feat_config.feature_dim = 64; | 158 | config_.feat_config.feature_dim = 64; |
| 159 | + | ||
| 160 | + // see | ||
| 161 | + // https://github.com/salute-developers/GigaAM/blob/main/gigaam/preprocess.py#L68 | ||
| 162 | + // | ||
| 163 | + // GigaAM uses n_fft 400 | ||
| 164 | + config_.feat_config.round_to_power_of_two = false; | ||
| 159 | } else { | 165 | } else { |
| 160 | config_.feat_config.low_freq = 0; | 166 | config_.feat_config.low_freq = 0; |
| 161 | // config_.feat_config.high_freq = 8000; | 167 | // config_.feat_config.high_freq = 8000; |
-
请 注册 或 登录 后发表评论