Fangjun Kuang
Committed by GitHub

Resize circular buffer on overflow (#422)

@@ -19,13 +19,83 @@ CircularBuffer::CircularBuffer(int32_t capacity) { @@ -19,13 +19,83 @@ CircularBuffer::CircularBuffer(int32_t capacity) {
19 buffer_.resize(capacity); 19 buffer_.resize(capacity);
20 } 20 }
21 21
  22 +void CircularBuffer::Resize(int32_t new_capacity) {
  23 + int32_t capacity = buffer_.size();
  24 + if (new_capacity <= capacity) {
  25 + SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
  26 + new_capacity, capacity);
  27 + return;
  28 + }
  29 +
  30 + int32_t size = Size();
  31 + if (size == 0) {
  32 + buffer_.resize(new_capacity);
  33 + return;
  34 + }
  35 +
  36 + std::vector<float> new_buffer(new_capacity);
  37 + int32_t start = head_ % capacity;
  38 + int32_t dest = head_ % new_capacity;
  39 +
  40 + if (start + size <= capacity) {
  41 + if (dest + size <= new_capacity) {
  42 + std::copy(buffer_.begin() + start, buffer_.begin() + start + size,
  43 + new_buffer.begin() + dest);
  44 + } else {
  45 + int32_t part1_size = new_capacity - dest;
  46 +
  47 + // copy [start, start+part1_size] to new_buffer
  48 + std::copy(buffer_.begin() + start, buffer_.begin() + start + part1_size,
  49 + new_buffer.begin() + dest);
  50 +
  51 + // copy [start+part1_size, start+size] to new_buffer
  52 + std::copy(buffer_.begin() + start + part1_size,
  53 + buffer_.begin() + start + size, new_buffer.begin());
  54 + }
  55 + } else {
  56 + int32_t part1_size = capacity - start;
  57 + int32_t part2_size = size - part1_size;
  58 +
  59 + // copy [start, start+part1_size] to new_buffer
  60 + if (dest + part1_size <= new_capacity) {
  61 + std::copy(buffer_.begin() + start, buffer_.begin() + start + part1_size,
  62 + new_buffer.begin() + dest);
  63 + } else {
  64 + int32_t first_part = new_capacity - dest;
  65 + int32_t second_part = part1_size - first_part;
  66 + std::copy(buffer_.begin() + start, buffer_.begin() + start + first_part,
  67 + new_buffer.begin() + dest);
  68 +
  69 + std::copy(buffer_.begin() + start + first_part,
  70 + buffer_.begin() + start + part1_size, new_buffer.begin());
  71 + }
  72 +
  73 + int32_t new_dest = (dest + part1_size) % new_capacity;
  74 +
  75 + if (new_dest + part2_size <= new_capacity) {
  76 + std::copy(buffer_.begin(), buffer_.begin() + part2_size,
  77 + new_buffer.begin() + new_dest);
  78 + } else {
  79 + int32_t first_part = new_capacity - new_dest;
  80 + std::copy(buffer_.begin(), buffer_.begin() + first_part,
  81 + new_buffer.begin() + new_dest);
  82 + std::copy(buffer_.begin() + first_part, buffer_.begin() + part2_size,
  83 + new_buffer.begin());
  84 + }
  85 + }
  86 + buffer_.swap(new_buffer);
  87 +}
  88 +
22 void CircularBuffer::Push(const float *p, int32_t n) { 89 void CircularBuffer::Push(const float *p, int32_t n) {
23 int32_t capacity = buffer_.size(); 90 int32_t capacity = buffer_.size();
24 int32_t size = Size(); 91 int32_t size = Size();
25 if (n + size > capacity) { 92 if (n + size > capacity) {
26 - SHERPA_ONNX_LOGE("Overflow! n: %d, size: %d, n+size: %d, capacity: %d", n,  
27 - size, n + size, capacity);  
28 - exit(-1); 93 + int32_t new_capacity = std::max(capacity * 2, n + size);
  94 + SHERPA_ONNX_LOGE(
  95 + "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
  96 + "capacity to: %d",
  97 + n, size, n + size, capacity, new_capacity);
  98 + Resize(new_capacity);
29 } 99 }
30 100
31 int32_t start = tail_ % capacity; 101 int32_t start = tail_ % capacity;
@@ -47,6 +47,8 @@ class CircularBuffer { @@ -47,6 +47,8 @@ class CircularBuffer {
47 tail_ = 0; 47 tail_ = 0;
48 } 48 }
49 49
  50 + void Resize(int32_t new_capacity);
  51 +
50 private: 52 private:
51 std::vector<float> buffer_; 53 std::vector<float> buffer_;
52 54
@@ -17,7 +17,8 @@ @@ -17,7 +17,8 @@
17 #include "android/asset_manager_jni.h" 17 #include "android/asset_manager_jni.h"
18 #endif 18 #endif
19 19
20 -#include <regex> 20 +#include <memory>
  21 +#include <regex> // NOLINT
21 22
22 #include "sherpa-onnx/csrc/macros.h" 23 #include "sherpa-onnx/csrc/macros.h"
23 #include "sherpa-onnx/csrc/onnx-utils.h" 24 #include "sherpa-onnx/csrc/onnx-utils.h"
@@ -6,8 +6,8 @@ @@ -6,8 +6,8 @@
6 #define SHERPA_ONNX_CSRC_LEXICON_H_ 6 #define SHERPA_ONNX_CSRC_LEXICON_H_
7 7
8 #include <cstdint> 8 #include <cstdint>
9 -#include <iostream>  
10 -#include <regex> 9 +#include <memory>
  10 +#include <regex> // NOLINT
11 #include <string> 11 #include <string>
12 #include <unordered_map> 12 #include <unordered_map>
13 #include <unordered_set> 13 #include <unordered_set>
@@ -4,6 +4,7 @@ @@ -4,6 +4,7 @@
4 4
5 #include "sherpa-onnx/csrc/voice-activity-detector.h" 5 #include "sherpa-onnx/csrc/voice-activity-detector.h"
6 6
  7 +#include <algorithm>
7 #include <queue> 8 #include <queue>
8 #include <utility> 9 #include <utility>
9 10
@@ -30,7 +31,7 @@ class VoiceActivityDetector::Impl { @@ -30,7 +31,7 @@ class VoiceActivityDetector::Impl {
30 void AcceptWaveform(const float *samples, int32_t n) { 31 void AcceptWaveform(const float *samples, int32_t n) {
31 int32_t window_size = model_->WindowSize(); 32 int32_t window_size = model_->WindowSize();
32 33
33 - // note n is usally window_size and there is no need to use 34 + // note n is usually window_size and there is no need to use
34 // an extra buffer here 35 // an extra buffer here
35 last_.insert(last_.end(), samples, samples + n); 36 last_.insert(last_.end(), samples, samples + n);
36 int32_t k = static_cast<int32_t>(last_.size()) / window_size; 37 int32_t k = static_cast<int32_t>(last_.size()) / window_size;
@@ -39,7 +40,7 @@ class VoiceActivityDetector::Impl { @@ -39,7 +40,7 @@ class VoiceActivityDetector::Impl {
39 40
40 for (int32_t i = 0; i != k; ++i, p += window_size) { 41 for (int32_t i = 0; i != k; ++i, p += window_size) {
41 buffer_.Push(p, window_size); 42 buffer_.Push(p, window_size);
42 - is_speech = model_->IsSpeech(p, window_size); 43 + is_speech = is_speech || model_->IsSpeech(p, window_size);
43 } 44 }
44 45
45 last_ = std::vector<float>( 46 last_ = std::vector<float>(
@@ -48,8 +49,9 @@ class VoiceActivityDetector::Impl { @@ -48,8 +49,9 @@ class VoiceActivityDetector::Impl {
48 if (is_speech) { 49 if (is_speech) {
49 if (start_ == -1) { 50 if (start_ == -1) {
50 // beginning of speech 51 // beginning of speech
51 - start_ = buffer_.Tail() - 2 * model_->WindowSize() -  
52 - model_->MinSpeechDurationSamples(); 52 + start_ = std::max(buffer_.Tail() - 2 * model_->WindowSize() -
  53 + model_->MinSpeechDurationSamples(),
  54 + buffer_.Head());
53 } 55 }
54 } else { 56 } else {
55 // non-speech 57 // non-speech
@@ -68,6 +70,15 @@ class VoiceActivityDetector::Impl { @@ -68,6 +70,15 @@ class VoiceActivityDetector::Impl {
68 buffer_.Pop(end - buffer_.Head()); 70 buffer_.Pop(end - buffer_.Head());
69 } 71 }
70 72
  73 + if (start_ == -1) {
  74 + int32_t end = buffer_.Tail() - 2 * model_->WindowSize() -
  75 + model_->MinSpeechDurationSamples();
  76 + int32_t n = std::max(0, end - buffer_.Head());
  77 + if (n > 0) {
  78 + buffer_.Pop(n);
  79 + }
  80 + }
  81 +
71 start_ = -1; 82 start_ = -1;
72 } 83 }
73 } 84 }