Resize circular buffer on overflow (#422)

Fangjun Kuang · GitHub
Commit 097d641869611ac23a6c45b7138562428e034a6b 097d6418 1 parent 9884cf71
sherpa-onnx/csrc/circular-buffer.cc
sherpa-onnx/csrc/circular-buffer.h
sherpa-onnx/csrc/lexicon.cc
sherpa-onnx/csrc/lexicon.h
sherpa-onnx/csrc/voice-activity-detector.cc
--- a/sherpa-onnx/csrc/circular-buffer.cc
查看文件 @097d641
+++ b/sherpa-onnx/csrc/circular-buffer.cc
查看文件 @097d641
@@ -19,13 +19,83 @@ CircularBuffer::CircularBuffer(int32_t capacity) {
   buffer_.resize(capacity);
 }
+void CircularBuffer::Resize(int32_t new_capacity) {
+  int32_t capacity = buffer_.size();
+  if (new_capacity <= capacity) {
+    SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
+                     new_capacity, capacity);
+    return;
+  }
+
+  int32_t size = Size();
+  if (size == 0) {
+    buffer_.resize(new_capacity);
+    return;
+  }
+
+  std::vector<float> new_buffer(new_capacity);
+  int32_t start = head_ % capacity;
+  int32_t dest = head_ % new_capacity;
+
+  if (start + size <= capacity) {
+    if (dest + size <= new_capacity) {
+      std::copy(buffer_.begin() + start, buffer_.begin() + start + size,
+                new_buffer.begin() + dest);
+    } else {
+      int32_t part1_size = new_capacity - dest;
+
+      // copy [start, start+part1_size] to new_buffer
+      std::copy(buffer_.begin() + start, buffer_.begin() + start + part1_size,
+                new_buffer.begin() + dest);
+
+      // copy [start+part1_size, start+size] to new_buffer
+      std::copy(buffer_.begin() + start + part1_size,
+                buffer_.begin() + start + size, new_buffer.begin());
+    }
+  } else {
+    int32_t part1_size = capacity - start;
+    int32_t part2_size = size - part1_size;
+
+    // copy [start, start+part1_size] to new_buffer
+    if (dest + part1_size <= new_capacity) {
+      std::copy(buffer_.begin() + start, buffer_.begin() + start + part1_size,
+                new_buffer.begin() + dest);
+    } else {
+      int32_t first_part = new_capacity - dest;
+      int32_t second_part = part1_size - first_part;
+      std::copy(buffer_.begin() + start, buffer_.begin() + start + first_part,
+                new_buffer.begin() + dest);
+
+      std::copy(buffer_.begin() + start + first_part,
+                buffer_.begin() + start + part1_size, new_buffer.begin());
+    }
+
+    int32_t new_dest = (dest + part1_size) % new_capacity;
+
+    if (new_dest + part2_size <= new_capacity) {
+      std::copy(buffer_.begin(), buffer_.begin() + part2_size,
+                new_buffer.begin() + new_dest);
+    } else {
+      int32_t first_part = new_capacity - new_dest;
+      std::copy(buffer_.begin(), buffer_.begin() + first_part,
+                new_buffer.begin() + new_dest);
+      std::copy(buffer_.begin() + first_part, buffer_.begin() + part2_size,
+                new_buffer.begin());
+    }
+  }
+  buffer_.swap(new_buffer);
+}
+
 void CircularBuffer::Push(const float *p, int32_t n) {
   int32_t capacity = buffer_.size();
   int32_t size = Size();
   if (n + size > capacity) {
-    SHERPA_ONNX_LOGE("Overflow! n: %d, size: %d, n+size: %d, capacity: %d", n,
-                     size, n + size, capacity);
-    exit(-1);
+    int32_t new_capacity = std::max(capacity * 2, n + size);
+    SHERPA_ONNX_LOGE(
+        "Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
+        "capacity to: %d",
+        n, size, n + size, capacity, new_capacity);
+    Resize(new_capacity);
   }
   int32_t start = tail_ % capacity;
--- a/sherpa-onnx/csrc/circular-buffer.h
查看文件 @097d641
+++ b/sherpa-onnx/csrc/circular-buffer.h
查看文件 @097d641
@@ -47,6 +47,8 @@ class CircularBuffer {
     tail_ = 0;
   }
+  void Resize(int32_t new_capacity);
+
  private:
   std::vector<float> buffer_;
--- a/sherpa-onnx/csrc/lexicon.cc
查看文件 @097d641
+++ b/sherpa-onnx/csrc/lexicon.cc
查看文件 @097d641
@@ -17,7 +17,8 @@
 #include "android/asset_manager_jni.h"
 #endif
-#include <regex>
+#include <memory>
+#include <regex>  // NOLINT
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/onnx-utils.h"
--- a/sherpa-onnx/csrc/lexicon.h
查看文件 @097d641
+++ b/sherpa-onnx/csrc/lexicon.h
查看文件 @097d641
@@ -6,8 +6,8 @@
 #define SHERPA_ONNX_CSRC_LEXICON_H_
 #include <cstdint>
-#include <iostream>
-#include <regex>
+#include <memory>
+#include <regex>  // NOLINT
 #include <string>
 #include <unordered_map>
 #include <unordered_set>
--- a/sherpa-onnx/csrc/voice-activity-detector.cc
查看文件 @097d641
+++ b/sherpa-onnx/csrc/voice-activity-detector.cc
查看文件 @097d641
@@ -4,6 +4,7 @@
 #include "sherpa-onnx/csrc/voice-activity-detector.h"
+#include <algorithm>
 #include <queue>
 #include <utility>
@@ -30,7 +31,7 @@ class VoiceActivityDetector::Impl {
   void AcceptWaveform(const float *samples, int32_t n) {
     int32_t window_size = model_->WindowSize();
-    // note n is usally window_size and there is no need to use
+    // note n is usually window_size and there is no need to use
     // an extra buffer here
     last_.insert(last_.end(), samples, samples + n);
     int32_t k = static_cast<int32_t>(last_.size()) / window_size;
@@ -39,7 +40,7 @@ class VoiceActivityDetector::Impl {
     for (int32_t i = 0; i != k; ++i, p += window_size) {
       buffer_.Push(p, window_size);
-      is_speech = model_->IsSpeech(p, window_size);
+      is_speech = is_speech || model_->IsSpeech(p, window_size);
     }
     last_ = std::vector<float>(
@@ -48,8 +49,9 @@ class VoiceActivityDetector::Impl {
     if (is_speech) {
       if (start_ == -1) {
         // beginning of speech
-        start_ = buffer_.Tail() - 2 * model_->WindowSize() -
-                 model_->MinSpeechDurationSamples();
+        start_ = std::max(buffer_.Tail() - 2 * model_->WindowSize() -
+                              model_->MinSpeechDurationSamples(),
+                          buffer_.Head());
       }
     } else {
       // non-speech
@@ -68,6 +70,15 @@ class VoiceActivityDetector::Impl {
         buffer_.Pop(end - buffer_.Head());
       }
+      if (start_ == -1) {
+        int32_t end = buffer_.Tail() - 2 * model_->WindowSize() -
+                      model_->MinSpeechDurationSamples();
+        int32_t n = std::max(0, end - buffer_.Head());
+        if (n > 0) {
+          buffer_.Pop(n);
+        }
+      }
+
       start_ = -1;
     }
   }