xuning

老版本录音实现

project(yolov8ncnn)
cmake_minimum_required(VERSION 3.10)
set(OpenCV_DIR ${CMAKE_SOURCE_DIR}/opencv-mobile-4.11.0-android/sdk/native/jni)
find_package(OpenCV REQUIRED core imgproc)
set(ncnn_DIR ${CMAKE_SOURCE_DIR}/ncnn-20250503-android-vulkan/${ANDROID_ABI}/lib/cmake/ncnn)
find_package(ncnn REQUIRED)
add_library(yolov8ncnn SHARED yolov8ncnn.cpp yolov8.cpp yolov8_det.cpp yolov8_seg.cpp yolov8_pose.cpp yolov8_cls.cpp yolov8_obb.cpp ndkcamera.cpp mp4recorder.cpp av_writer.cpp audio_recorder.cpp)
target_link_libraries(yolov8ncnn ncnn ${OpenCV_LIBS} camera2ndk mediandk android log OpenSLES)
... ...
#include "audio_recorder.h"
#include <android/log.h>
#define TAG "AudioRecorder"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, TAG, __VA_ARGS__)
AudioRecorder::AudioRecorder()
: engine_object_(nullptr)
, engine_(nullptr)
, recorder_object_(nullptr)
, recorder_(nullptr)
, buffer_queue_(nullptr)
, sample_rate_(44100)
, channels_(1)
, buffer_size_(4096)
, recording_(false)
, should_stop_(false)
, using_buffer1_(true) {
audio_buffer1_.resize(buffer_size_);
audio_buffer2_.resize(buffer_size_);
}
AudioRecorder::~AudioRecorder() {
stopRecording();
cleanup();
}
bool AudioRecorder::startRecording(int sampleRate, int channels) {
std::lock_guard<std::mutex> lock(recorder_mutex_);
if (recording_) {
LOGE("Already recording");
return false;
}
sample_rate_ = sampleRate;
channels_ = channels;
buffer_size_ = sample_rate_ * channels_ / 10; // 100ms buffer
audio_buffer1_.resize(buffer_size_);
audio_buffer2_.resize(buffer_size_);
// Create OpenSL ES engine
SLresult result = slCreateEngine(&engine_object_, 0, nullptr, 0, nullptr, nullptr);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to create OpenSL ES engine: %d", result);
return false;
}
result = (*engine_object_)->Realize(engine_object_, SL_BOOLEAN_FALSE);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to realize engine: %d", result);
cleanup();
return false;
}
result = (*engine_object_)->GetInterface(engine_object_, SL_IID_ENGINE, &engine_);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to get engine interface: %d", result);
cleanup();
return false;
}
// Configure audio source
SLDataLocator_IODevice loc_dev = {
SL_DATALOCATOR_IODEVICE,
SL_IODEVICE_AUDIOINPUT,
SL_DEFAULTDEVICEID_AUDIOINPUT,
nullptr
};
SLDataSource audioSrc = {&loc_dev, nullptr};
// Configure audio sink
SLDataLocator_AndroidSimpleBufferQueue loc_bq = {
SL_DATALOCATOR_ANDROIDSIMPLEBUFFERQUEUE,
2
};
SLDataFormat_PCM format_pcm = {
SL_DATAFORMAT_PCM,
static_cast<SLuint32>(channels_),
static_cast<SLuint32>(sample_rate_ * 1000), // mHz
SL_PCMSAMPLEFORMAT_FIXED_16,
SL_PCMSAMPLEFORMAT_FIXED_16,
channels_ == 1 ? SL_SPEAKER_FRONT_CENTER : (SL_SPEAKER_FRONT_LEFT | SL_SPEAKER_FRONT_RIGHT),
SL_BYTEORDER_LITTLEENDIAN
};
SLDataSink audioSnk = {&loc_bq, &format_pcm};
// Create audio recorder
const SLInterfaceID id[1] = {SL_IID_ANDROIDSIMPLEBUFFERQUEUE};
const SLboolean req[1] = {SL_BOOLEAN_TRUE};
result = (*engine_)->CreateAudioRecorder(engine_, &recorder_object_, &audioSrc, &audioSnk, 1, id, req);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to create audio recorder: %d", result);
cleanup();
return false;
}
result = (*recorder_object_)->Realize(recorder_object_, SL_BOOLEAN_FALSE);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to realize recorder: %d", result);
cleanup();
return false;
}
result = (*recorder_object_)->GetInterface(recorder_object_, SL_IID_RECORD, &recorder_);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to get recorder interface: %d", result);
cleanup();
return false;
}
result = (*recorder_object_)->GetInterface(recorder_object_, SL_IID_ANDROIDSIMPLEBUFFERQUEUE, &buffer_queue_);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to get buffer queue interface: %d", result);
cleanup();
return false;
}
// Set callback
result = (*buffer_queue_)->RegisterCallback(buffer_queue_, audioCallback, this);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to register callback: %d", result);
cleanup();
return false;
}
// Start recording
recording_ = true;
should_stop_ = false;
// Enqueue initial buffers
(*buffer_queue_)->Enqueue(buffer_queue_, audio_buffer1_.data(), audio_buffer1_.size() * sizeof(short));
(*buffer_queue_)->Enqueue(buffer_queue_, audio_buffer2_.data(), audio_buffer2_.size() * sizeof(short));
result = (*recorder_)->SetRecordState(recorder_, SL_RECORDSTATE_RECORDING);
if (result != SL_RESULT_SUCCESS) {
LOGE("Failed to start recording: %d", result);
recording_ = false;
cleanup();
return false;
}
LOGI("Audio recording started: %dHz, %d channels", sample_rate_, channels_);
return true;
}
bool AudioRecorder::stopRecording() {
std::lock_guard<std::mutex> lock(recorder_mutex_);
if (!recording_) {
return true;
}
should_stop_ = true;
recording_ = false;
if (recorder_) {
(*recorder_)->SetRecordState(recorder_, SL_RECORDSTATE_STOPPED);
}
// Clear buffer queue
if (buffer_queue_) {
(*buffer_queue_)->Clear(buffer_queue_);
}
cleanup();
// Notify waiting threads
queue_cv_.notify_all();
LOGI("Audio recording stopped");
return true;
}
void AudioRecorder::audioCallback(SLAndroidSimpleBufferQueueItf bq, void* context) {
AudioRecorder* recorder = static_cast<AudioRecorder*>(context);
if (recorder && recorder->recording_) {
// Determine which buffer was just filled
std::vector<short>* current_buffer = recorder->using_buffer1_ ?
&recorder->audio_buffer1_ : &recorder->audio_buffer2_;
// Process the audio data
recorder->processAudioData(current_buffer->data(), current_buffer->size());
// Switch buffers
recorder->using_buffer1_ = !recorder->using_buffer1_;
// Re-enqueue the buffer
(*bq)->Enqueue(bq, current_buffer->data(), current_buffer->size() * sizeof(short));
}
}
void AudioRecorder::processAudioData(const short* data, size_t size) {
if (!recording_ || should_stop_) {
return;
}
// Copy audio data to queue
std::vector<short> audio_data(data, data + size);
{
std::lock_guard<std::mutex> lock(queue_mutex_);
audio_queue_.push(std::move(audio_data));
// Limit queue size to prevent memory issues
while (audio_queue_.size() > 100) {
audio_queue_.pop();
}
}
queue_cv_.notify_one();
}
bool AudioRecorder::getAudioData(std::vector<short>& audioData) {
std::unique_lock<std::mutex> lock(queue_mutex_);
if (audio_queue_.empty()) {
if (!recording_) {
return false;
}
// Wait for data with timeout
queue_cv_.wait_for(lock, std::chrono::milliseconds(100));
}
if (!audio_queue_.empty()) {
audioData = std::move(audio_queue_.front());
audio_queue_.pop();
return true;
}
return false;
}
void AudioRecorder::cleanup() {
if (buffer_queue_) {
(*buffer_queue_)->Clear(buffer_queue_);
buffer_queue_ = nullptr;
}
if (recorder_object_) {
(*recorder_object_)->Destroy(recorder_object_);
recorder_object_ = nullptr;
recorder_ = nullptr;
}
if (engine_object_) {
(*engine_object_)->Destroy(engine_object_);
engine_object_ = nullptr;
engine_ = nullptr;
}
}
\ No newline at end of file
... ...
#ifndef AUDIO_RECORDER_H
#define AUDIO_RECORDER_H
#include <string>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <queue>
#include <vector>
#include <atomic>
#include <SLES/OpenSLES.h>
#include <SLES/OpenSLES_Android.h>
class AudioRecorder {
public:
AudioRecorder();
~AudioRecorder();
bool startRecording(int sampleRate = 44100, int channels = 1);
bool stopRecording();
bool isRecording() const { return recording_; }
// Get audio data for encoding
bool getAudioData(std::vector<short>& audioData);
// Audio parameters
int getSampleRate() const { return sample_rate_; }
int getChannels() const { return channels_; }
private:
static void audioCallback(SLAndroidSimpleBufferQueueItf bq, void* context);
void processAudioData(const short* data, size_t size);
void cleanup();
// OpenSL ES objects
SLObjectItf engine_object_;
SLEngineItf engine_;
SLObjectItf recorder_object_;
SLRecordItf recorder_;
SLAndroidSimpleBufferQueueItf buffer_queue_;
// Audio parameters
int sample_rate_;
int channels_;
int buffer_size_;
// Recording state
std::atomic<bool> recording_;
std::atomic<bool> should_stop_;
// Audio buffers
std::vector<short> audio_buffer1_;
std::vector<short> audio_buffer2_;
bool using_buffer1_;
// Audio data queue
std::queue<std::vector<short>> audio_queue_;
std::mutex queue_mutex_;
std::condition_variable queue_cv_;
// Thread safety
std::mutex recorder_mutex_;
};
#endif // AUDIO_RECORDER_H
\ No newline at end of file
... ...
#include "av_writer.h"
#include <android/log.h>
#include <cstring>
#define LOG_TAG "AVWriter"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
// AVI format constants
#define FOURCC(a,b,c,d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
static const uint32_t RIFF_FOURCC = FOURCC('R','I','F','F');
static const uint32_t AVI_FOURCC = FOURCC('A','V','I',' ');
static const uint32_t LIST_FOURCC = FOURCC('L','I','S','T');
static const uint32_t HDRL_FOURCC = FOURCC('h','d','r','l');
static const uint32_t AVIH_FOURCC = FOURCC('a','v','i','h');
static const uint32_t STRL_FOURCC = FOURCC('s','t','r','l');
static const uint32_t STRH_FOURCC = FOURCC('s','t','r','h');
static const uint32_t STRF_FOURCC = FOURCC('s','t','r','f');
static const uint32_t MOVI_FOURCC = FOURCC('m','o','v','i');
static const uint32_t VIDS_FOURCC = FOURCC('v','i','d','s');
static const uint32_t AUDS_FOURCC = FOURCC('a','u','d','s');
static const uint32_t DIB_FOURCC = FOURCC('D','I','B',' ');
static const uint32_t PCM_FOURCC = FOURCC('P','C','M',' ');
static const uint32_t DC00_FOURCC = FOURCC('0','0','d','c');
static const uint32_t WB01_FOURCC = FOURCC('0','1','w','b');
AVWriter::AVWriter()
: is_open_(false)
, width_(0)
, height_(0)
, fps_(30)
, video_frame_count_(0)
, audio_enabled_(false)
, sample_rate_(44100)
, channels_(1)
, audio_sample_count_(0)
, total_video_size_(0)
, total_audio_size_(0)
{
}
AVWriter::~AVWriter() {
close();
}
bool AVWriter::open(const std::string& filename, int width, int height, int fps,
bool enableAudio, int sampleRate, int channels) {
if (is_open_) {
close();
}
filename_ = filename;
width_ = width;
height_ = height;
fps_ = fps;
audio_enabled_ = enableAudio;
sample_rate_ = sampleRate;
channels_ = channels;
video_frame_count_ = 0;
audio_sample_count_ = 0;
total_video_size_ = 0;
total_audio_size_ = 0;
// Change extension to .avi
std::string avi_filename = filename;
size_t pos = avi_filename.find_last_of('.');
if (pos != std::string::npos) {
avi_filename = avi_filename.substr(0, pos) + ".avi";
}
file_.open(avi_filename, std::ios::binary);
if (!file_.is_open()) {
LOGE("Failed to open file: %s", avi_filename.c_str());
return false;
}
writeAVIHeader();
is_open_ = true;
LOGI("Opened AV file: %s (%dx%d @ %dfps) Audio: %s",
avi_filename.c_str(), width, height, fps,
audio_enabled_ ? "ON" : "OFF");
return true;
}
bool AVWriter::writeVideoFrame(const cv::Mat& frame) {
if (!is_open_ || frame.empty()) {
return false;
}
writeVideoFrameInternal(frame);
video_frame_count_++;
return true;
}
bool AVWriter::writeAudioData(const std::vector<short>& audioData) {
if (!is_open_ || !audio_enabled_ || audioData.empty()) {
return false;
}
writeAudioChunk(audioData);
audio_sample_count_ += audioData.size();
return true;
}
void AVWriter::writeAVIHeader() {
// Calculate frame size
int frame_size = width_ * height_ * 3; // RGB24
int microsec_per_frame = 1000000 / fps_;
// RIFF header
file_.write("RIFF", 4);
file_size_pos_ = file_.tellp();
uint32_t file_size = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&file_size), 4);
file_.write("AVI ", 4);
// LIST hdrl
file_.write("LIST", 4);
uint32_t hdrl_size = audio_enabled_ ? 308 : 244; // Approximate size
file_.write(reinterpret_cast<const char*>(&hdrl_size), 4);
file_.write("hdrl", 4);
// avih (main AVI header)
file_.write("avih", 4);
uint32_t avih_size = 56;
file_.write(reinterpret_cast<const char*>(&avih_size), 4);
uint32_t microsec_per_frame_val = microsec_per_frame;
file_.write(reinterpret_cast<const char*>(&microsec_per_frame_val), 4);
uint32_t max_bytes_per_sec = frame_size * fps_;
file_.write(reinterpret_cast<const char*>(&max_bytes_per_sec), 4);
uint32_t padding_granularity = 0;
file_.write(reinterpret_cast<const char*>(&padding_granularity), 4);
uint32_t flags = 0x10; // AVIF_HASINDEX
file_.write(reinterpret_cast<const char*>(&flags), 4);
video_frames_pos_ = file_.tellp();
uint32_t total_frames = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
uint32_t initial_frames = 0;
file_.write(reinterpret_cast<const char*>(&initial_frames), 4);
uint32_t streams = audio_enabled_ ? 2 : 1;
file_.write(reinterpret_cast<const char*>(&streams), 4);
uint32_t suggested_buffer_size = frame_size;
file_.write(reinterpret_cast<const char*>(&suggested_buffer_size), 4);
uint32_t width = width_;
file_.write(reinterpret_cast<const char*>(&width), 4);
uint32_t height = height_;
file_.write(reinterpret_cast<const char*>(&height), 4);
uint32_t reserved[4] = {0, 0, 0, 0};
file_.write(reinterpret_cast<const char*>(reserved), 16);
// Video stream header
file_.write("LIST", 4);
uint32_t strl_size = 116;
file_.write(reinterpret_cast<const char*>(&strl_size), 4);
file_.write("strl", 4);
// strh (stream header)
file_.write("strh", 4);
uint32_t strh_size = 56;
file_.write(reinterpret_cast<const char*>(&strh_size), 4);
file_.write("vids", 4); // fccType
file_.write("DIB ", 4); // fccHandler
uint32_t stream_flags = 0;
file_.write(reinterpret_cast<const char*>(&stream_flags), 4);
uint16_t priority = 0;
file_.write(reinterpret_cast<const char*>(&priority), 2);
uint16_t language = 0;
file_.write(reinterpret_cast<const char*>(&language), 2);
uint32_t initial_frames_stream = 0;
file_.write(reinterpret_cast<const char*>(&initial_frames_stream), 4);
uint32_t scale = 1;
file_.write(reinterpret_cast<const char*>(&scale), 4);
uint32_t rate = fps_;
file_.write(reinterpret_cast<const char*>(&rate), 4);
uint32_t start = 0;
file_.write(reinterpret_cast<const char*>(&start), 4);
video_length_pos_ = file_.tellp(); // Save position to update later
uint32_t length = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&length), 4);
uint32_t suggested_buffer_size_stream = frame_size;
file_.write(reinterpret_cast<const char*>(&suggested_buffer_size_stream), 4);
uint32_t quality = 0;
file_.write(reinterpret_cast<const char*>(&quality), 4);
uint32_t sample_size = 0;
file_.write(reinterpret_cast<const char*>(&sample_size), 4);
uint16_t left = 0, top = 0, right = width_, bottom = height_;
file_.write(reinterpret_cast<const char*>(&left), 2);
file_.write(reinterpret_cast<const char*>(&top), 2);
file_.write(reinterpret_cast<const char*>(&right), 2);
file_.write(reinterpret_cast<const char*>(&bottom), 2);
// strf (stream format)
file_.write("strf", 4);
uint32_t strf_size = 40;
file_.write(reinterpret_cast<const char*>(&strf_size), 4);
// BITMAPINFOHEADER
uint32_t bi_size = 40;
file_.write(reinterpret_cast<const char*>(&bi_size), 4);
int32_t bi_width = width_;
file_.write(reinterpret_cast<const char*>(&bi_width), 4);
int32_t bi_height = height_;
file_.write(reinterpret_cast<const char*>(&bi_height), 4);
uint16_t bi_planes = 1;
file_.write(reinterpret_cast<const char*>(&bi_planes), 2);
uint16_t bi_bit_count = 24;
file_.write(reinterpret_cast<const char*>(&bi_bit_count), 2);
uint32_t bi_compression = 0; // BI_RGB
file_.write(reinterpret_cast<const char*>(&bi_compression), 4);
uint32_t bi_size_image = frame_size;
file_.write(reinterpret_cast<const char*>(&bi_size_image), 4);
int32_t bi_x_pels_per_meter = 0;
file_.write(reinterpret_cast<const char*>(&bi_x_pels_per_meter), 4);
int32_t bi_y_pels_per_meter = 0;
file_.write(reinterpret_cast<const char*>(&bi_y_pels_per_meter), 4);
uint32_t bi_clr_used = 0;
file_.write(reinterpret_cast<const char*>(&bi_clr_used), 4);
uint32_t bi_clr_important = 0;
file_.write(reinterpret_cast<const char*>(&bi_clr_important), 4);
// Audio stream header (if enabled)
if (audio_enabled_) {
file_.write("LIST", 4);
uint32_t audio_strl_size = 92;
file_.write(reinterpret_cast<const char*>(&audio_strl_size), 4);
file_.write("strl", 4);
// Audio strh
file_.write("strh", 4);
uint32_t audio_strh_size = 56;
file_.write(reinterpret_cast<const char*>(&audio_strh_size), 4);
file_.write("auds", 4); // fccType
uint32_t audio_handler = 0;
file_.write(reinterpret_cast<const char*>(&audio_handler), 4);
uint32_t audio_stream_flags = 0;
file_.write(reinterpret_cast<const char*>(&audio_stream_flags), 4);
uint16_t audio_priority = 0;
file_.write(reinterpret_cast<const char*>(&audio_priority), 2);
uint16_t audio_language = 0;
file_.write(reinterpret_cast<const char*>(&audio_language), 2);
uint32_t audio_initial_frames = 0;
file_.write(reinterpret_cast<const char*>(&audio_initial_frames), 4);
uint32_t audio_scale = 1;
file_.write(reinterpret_cast<const char*>(&audio_scale), 4);
uint32_t audio_rate = sample_rate_;
file_.write(reinterpret_cast<const char*>(&audio_rate), 4);
uint32_t audio_start = 0;
file_.write(reinterpret_cast<const char*>(&audio_start), 4);
audio_samples_pos_ = file_.tellp();
uint32_t audio_length = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&audio_length), 4);
uint32_t audio_suggested_buffer_size = sample_rate_ * channels_ * 2; // 1 second buffer
file_.write(reinterpret_cast<const char*>(&audio_suggested_buffer_size), 4);
uint32_t audio_quality = 0;
file_.write(reinterpret_cast<const char*>(&audio_quality), 4);
uint32_t audio_sample_size = channels_ * 2; // 16-bit samples
file_.write(reinterpret_cast<const char*>(&audio_sample_size), 4);
uint32_t audio_reserved[2] = {0, 0};
file_.write(reinterpret_cast<const char*>(audio_reserved), 8);
// Audio strf (WAVEFORMATEX)
file_.write("strf", 4);
uint32_t audio_strf_size = 16;
file_.write(reinterpret_cast<const char*>(&audio_strf_size), 4);
uint16_t format_tag = 1; // PCM
file_.write(reinterpret_cast<const char*>(&format_tag), 2);
uint16_t audio_channels = channels_;
file_.write(reinterpret_cast<const char*>(&audio_channels), 2);
uint32_t samples_per_sec = sample_rate_;
file_.write(reinterpret_cast<const char*>(&samples_per_sec), 4);
uint32_t avg_bytes_per_sec = sample_rate_ * channels_ * 2;
file_.write(reinterpret_cast<const char*>(&avg_bytes_per_sec), 4);
uint16_t block_align = channels_ * 2;
file_.write(reinterpret_cast<const char*>(&block_align), 2);
uint16_t bits_per_sample = 16;
file_.write(reinterpret_cast<const char*>(&bits_per_sample), 2);
}
// LIST movi
file_.write("LIST", 4);
movi_size_pos_ = file_.tellp();
uint32_t movi_size = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&movi_size), 4);
file_.write("movi", 4);
movi_list_pos_ = file_.tellp();
}
void AVWriter::writeVideoFrameInternal(const cv::Mat& frame) {
if (frame.empty()) return;
// Convert frame to BGR if needed and flip vertically (AVI requirement)
cv::Mat bgr_frame;
if (frame.channels() == 3) {
cv::flip(frame, bgr_frame, 0); // Flip vertically
} else if (frame.channels() == 4) {
cv::Mat temp;
cv::cvtColor(frame, temp, cv::COLOR_RGBA2BGR);
cv::flip(temp, bgr_frame, 0);
} else {
LOGE("Unsupported frame format");
return;
}
// Resize if necessary
if (bgr_frame.cols != width_ || bgr_frame.rows != height_) {
cv::resize(bgr_frame, bgr_frame, cv::Size(width_, height_));
}
// Write video chunk
file_.write("00dc", 4); // Video chunk ID
uint32_t chunk_size = bgr_frame.total() * bgr_frame.elemSize();
file_.write(reinterpret_cast<const char*>(&chunk_size), 4);
file_.write(reinterpret_cast<const char*>(bgr_frame.data), chunk_size);
// Pad to even boundary
if (chunk_size % 2 == 1) {
char pad = 0;
file_.write(&pad, 1);
}
total_video_size_ += chunk_size + 8 + (chunk_size % 2);
}
void AVWriter::writeAudioChunk(const std::vector<short>& audioData) {
if (audioData.empty()) return;
// Write audio chunk
file_.write("01wb", 4); // Audio chunk ID
uint32_t chunk_size = audioData.size() * sizeof(short);
file_.write(reinterpret_cast<const char*>(&chunk_size), 4);
file_.write(reinterpret_cast<const char*>(audioData.data()), chunk_size);
// Pad to even boundary
if (chunk_size % 2 == 1) {
char pad = 0;
file_.write(&pad, 1);
}
total_audio_size_ += chunk_size + 8 + (chunk_size % 2);
}
void AVWriter::close() {
if (!is_open_) {
return;
}
finalize();
file_.close();
is_open_ = false;
LOGI("Closed AV file: %s (%d video frames, %d audio samples)",
filename_.c_str(), video_frame_count_, audio_sample_count_);
}
void AVWriter::finalize() {
updateHeaders();
}
void AVWriter::updateHeaders() {
std::streampos current_pos = file_.tellp();
// Update file size
file_.seekp(file_size_pos_);
uint32_t file_size = static_cast<uint32_t>(current_pos) - 8;
file_.write(reinterpret_cast<const char*>(&file_size), 4);
// Update total frames
file_.seekp(video_frames_pos_);
uint32_t total_frames = video_frame_count_;
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
// Update video stream length (critical for correct duration calculation)
file_.seekp(video_length_pos_);
uint32_t video_length = video_frame_count_;
file_.write(reinterpret_cast<const char*>(&video_length), 4);
// Update audio samples if audio is enabled
if (audio_enabled_) {
file_.seekp(audio_samples_pos_);
uint32_t audio_length = audio_sample_count_;
file_.write(reinterpret_cast<const char*>(&audio_length), 4);
}
// Update movi size
file_.seekp(movi_size_pos_);
uint32_t movi_size = total_video_size_ + total_audio_size_ + 4; // +4 for "movi"
file_.write(reinterpret_cast<const char*>(&movi_size), 4);
// Restore position
file_.seekp(current_pos);
}
\ No newline at end of file
... ...
#ifndef AV_WRITER_H
#define AV_WRITER_H
#include <string>
#include <vector>
#include <fstream>
#include <opencv2/opencv.hpp>
class AVWriter {
public:
AVWriter();
~AVWriter();
bool open(const std::string& filename, int width, int height, int fps,
bool enableAudio = true, int sampleRate = 44100, int channels = 1);
bool writeVideoFrame(const cv::Mat& frame);
bool writeAudioData(const std::vector<short>& audioData);
void close();
private:
void writeAVIHeader();
void writeVideoFrameInternal(const cv::Mat& frame);
void writeAudioChunk(const std::vector<short>& audioData);
void finalize();
void updateHeaders();
// File handling
std::ofstream file_;
std::string filename_;
bool is_open_;
// Video parameters
int width_;
int height_;
int fps_;
int video_frame_count_;
// Audio parameters
bool audio_enabled_;
int sample_rate_;
int channels_;
int audio_sample_count_;
// AVI structure tracking
std::streampos movi_list_pos_;
std::streampos file_size_pos_;
std::streampos movi_size_pos_;
std::streampos video_frames_pos_;
std::streampos video_length_pos_; // Position of video stream length field
std::streampos audio_samples_pos_;
// Data buffers
std::vector<uint8_t> frame_buffer_;
size_t total_video_size_;
size_t total_audio_size_;
};
#endif // AV_WRITER_H
\ No newline at end of file
... ...
#include "mp4recorder.h"
#include <android/log.h>
#include <chrono>
#define LOG_TAG "MP4Recorder"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
MP4Recorder::MP4Recorder()
: recording_(false)
, should_stop_(false)
, audio_enabled_(false)
, frame_width_(0)
, frame_height_(0)
, target_fps_(30)
, current_fps_(1.0f) // Start with low FPS assumption
, frame_count_(0)
, fps_smoothing_factor_(0.3f) // Faster adaptation
, max_recent_frames_(10)
, use_fast_detection_(true)
, has_last_frame_(false)
, min_fps_threshold_(5)
{
recent_frame_times_.reserve(max_recent_frames_);
}
MP4Recorder::~MP4Recorder() {
stopRecording();
}
bool MP4Recorder::startRecording(const std::string& outputPath, int width, int height, int fps, bool enableAudio) {
if (recording_) {
LOGE("Already recording");
return false;
}
output_path_ = outputPath;
frame_width_ = width;
frame_height_ = height;
target_fps_ = fps;
current_fps_ = fps;
audio_enabled_ = enableAudio;
should_stop_ = false;
// Reset frame rate monitoring
frame_count_ = 0;
has_last_frame_ = false;
use_fast_detection_ = true;
current_fps_ = 1.0f; // Conservative start
recent_frame_times_.clear();
recording_start_time_ = std::chrono::steady_clock::now();
last_frame_time_ = recording_start_time_;
// Clear frame queue
{
std::lock_guard<std::mutex> lock(queue_mutex_);
while (!frame_queue_.empty()) {
frame_queue_.pop();
}
}
// Open AV writer (only if width/height are specified)
if (width > 0 && height > 0) {
std::lock_guard<std::mutex> lock(writer_mutex_);
if (!av_writer_.open(output_path_, width, height, fps, audio_enabled_)) {
LOGE("Failed to open AV writer");
return false;
}
}
// Start audio recording if enabled
if (audio_enabled_) {
if (!audio_recorder_.startRecording()) {
LOGE("Failed to start audio recording");
return false;
}
// Start audio thread
audio_thread_ = std::thread(&MP4Recorder::audioThread, this);
}
recording_ = true;
// Start writer thread
writer_thread_ = std::thread(&MP4Recorder::writerThread, this);
LOGI("Recording started: %s (%dx%d @ %dfps) Audio: %s",
output_path_.c_str(), width, height, fps, audio_enabled_ ? "ON" : "OFF");
return true;
}
bool MP4Recorder::stopRecording() {
if (!recording_) {
return true;
}
recording_ = false;
should_stop_ = true;
// Stop audio recording
if (audio_enabled_) {
audio_recorder_.stopRecording();
if (audio_thread_.joinable()) {
audio_thread_.join();
}
}
// Notify writer thread
queue_cv_.notify_all();
// Wait for writer thread to finish
if (writer_thread_.joinable()) {
writer_thread_.join();
}
// Close AV writer
{
std::lock_guard<std::mutex> lock(writer_mutex_);
av_writer_.close();
}
LOGI("Recording stopped: %s", output_path_.c_str());
return true;
}
bool MP4Recorder::writeFrame(const cv::Mat& frame) {
if (!recording_) {
return false;
}
// Initialize video writer with actual frame size if not done yet
if (frame_width_ == 0 || frame_height_ == 0) {
setFrameSize(frame.cols, frame.rows);
}
// Update frame rate monitoring
updateFrameRate();
// Convert and resize frame
cv::Mat processed_frame;
if (frame.channels() == 3) {
// Convert BGR to RGB for correct color channels
cv::cvtColor(frame, processed_frame, cv::COLOR_BGR2RGB);
} else if (frame.channels() == 4) {
// Convert RGBA to RGB for correct color channels
cv::cvtColor(frame, processed_frame, cv::COLOR_RGBA2RGB);
} else {
LOGE("Unsupported frame format");
return false;
}
// Store the last frame for duplication if needed
last_frame_ = processed_frame.clone();
has_last_frame_ = true;
// Write frame with duplication if FPS is too low
writeFrameWithDuplication(processed_frame);
return true;
}
void MP4Recorder::writerThread() {
LOGI("Writer thread started");
while (!should_stop_) {
cv::Mat frame;
// Wait for frame or stop signal
{
std::unique_lock<std::mutex> lock(queue_mutex_);
queue_cv_.wait(lock, [this] { return !frame_queue_.empty() || should_stop_; });
if (should_stop_ && frame_queue_.empty()) {
break;
}
if (!frame_queue_.empty()) {
frame = frame_queue_.front();
frame_queue_.pop();
}
}
if (!frame.empty()) {
// Write frame to AV file
std::lock_guard<std::mutex> lock(writer_mutex_);
av_writer_.writeVideoFrame(frame);
}
}
LOGI("Writer thread finished");
}
void MP4Recorder::setFrameSize(int width, int height) {
if (frame_width_ != 0 && frame_height_ != 0) {
// Already set, don't change
return;
}
frame_width_ = width;
frame_height_ = height;
// Initialize AV writer with actual frame size
std::lock_guard<std::mutex> lock(writer_mutex_);
if (!av_writer_.open(output_path_, width, height, target_fps_, audio_enabled_)) {
LOGE("Failed to open AV writer with size %dx%d", width, height);
} else {
LOGI("AV writer initialized with actual frame size: %dx%d", width, height);
}
}
void MP4Recorder::audioThread() {
LOGI("Audio thread started");
std::vector<short> audioData;
while (!should_stop_) {
if (audio_recorder_.getAudioData(audioData)) {
if (audioData.size() > 0) {
// Write audio data to AV file
std::lock_guard<std::mutex> lock(writer_mutex_);
av_writer_.writeAudioData(audioData);
}
} else {
// No audio data available, small delay to prevent busy waiting
std::this_thread::sleep_for(std::chrono::milliseconds(10));
}
}
LOGI("Audio thread finished");
}
void MP4Recorder::updateFrameRate() {
auto current_time = std::chrono::steady_clock::now();
frame_count_++;
// Add current time to recent frames
recent_frame_times_.push_back(current_time);
if (recent_frame_times_.size() > max_recent_frames_) {
recent_frame_times_.erase(recent_frame_times_.begin());
}
// Enhanced fast detection for initial frames
if (use_fast_detection_ && frame_count_ <= 20) {
// Special handling for first 3 frames to prevent initial acceleration
if (frame_count_ <= 3) {
// Conservative approach: assume worst-case scenario (1fps)
current_fps_ = 1.0f;
LOGI("Initial frame %d: Conservative FPS=1.0", frame_count_);
} else if (recent_frame_times_.size() >= 3) {
// Fast window-based detection for frames 4-20
auto time_span = std::chrono::duration_cast<std::chrono::milliseconds>(
recent_frame_times_.back() - recent_frame_times_.front()).count();
if (time_span > 0) {
float window_fps = (recent_frame_times_.size() - 1) * 1000.0f / time_span;
// Use direct window FPS for fast response
current_fps_ = window_fps;
LOGI("Fast detection frame %d: Window FPS=%.2f", frame_count_, current_fps_);
}
}
// Switch to smooth detection after 20 frames
if (frame_count_ >= 20) {
use_fast_detection_ = false;
LOGI("Switching to smooth FPS detection, current FPS: %.2f", current_fps_);
}
} else {
// Smooth detection for stable operation
if (recent_frame_times_.size() >= 5) {
auto time_span = std::chrono::duration_cast<std::chrono::milliseconds>(
recent_frame_times_.back() - recent_frame_times_.front()).count();
if (time_span > 0) {
float recent_fps = (recent_frame_times_.size() - 1) * 1000.0f / time_span;
// Use faster smoothing for better responsiveness
current_fps_ = current_fps_ * (1.0f - fps_smoothing_factor_) + recent_fps * fps_smoothing_factor_;
}
}
// Log FPS periodically
if (frame_count_ % 30 == 0) {
LOGI("Smooth FPS: %.2f, Target FPS: %d", current_fps_, target_fps_);
}
}
last_frame_time_ = current_time;
}
void MP4Recorder::writeFrameWithDuplication(const cv::Mat& frame) {
// Add original frame to queue
{
std::lock_guard<std::mutex> lock(queue_mutex_);
if (frame_queue_.size() < 100) {
frame_queue_.push(frame.clone());
queue_cv_.notify_one();
}
}
// Calculate duplication count based on current FPS and frame count
int duplication_count = 0;
if (use_fast_detection_) {
// Fast detection mode - more aggressive duplication to prevent initial acceleration
if (frame_count_ <= 3) {
// First 3 frames: maximum duplication to ensure smooth start
duplication_count = target_fps_ - 1;
LOGI("Initial frame %d: Duplicating %d times (conservative start)", frame_count_, duplication_count);
} else if (frame_count_ <= 10) {
// Frames 4-10: adaptive duplication based on detected FPS
duplication_count = static_cast<int>(target_fps_ / std::max(current_fps_, 0.5f)) - 1;
duplication_count = std::min(duplication_count, 20);
if (duplication_count > 0) {
LOGI("Fast adaptation frame %d: Duplicating %d times for FPS %.2f", frame_count_, duplication_count, current_fps_);
}
} else {
// Frames 11-20: normal duplication
if (current_fps_ < min_fps_threshold_) {
duplication_count = static_cast<int>(target_fps_ / std::max(current_fps_, 0.5f)) - 1;
duplication_count = std::min(duplication_count, 15);
if (duplication_count > 0) {
LOGI("Fast mode frame %d: Duplicating %d times for FPS %.2f", frame_count_, duplication_count, current_fps_);
}
}
}
} else {
// Smooth detection mode - normal duplication
if (current_fps_ < min_fps_threshold_) {
duplication_count = static_cast<int>(target_fps_ / std::max(current_fps_, 0.5f)) - 1;
duplication_count = std::min(duplication_count, 10);
if (duplication_count > 0 && frame_count_ % 30 == 0) {
LOGI("Smooth mode: Duplicating %d times for FPS %.2f", duplication_count, current_fps_);
}
}
}
// Add duplicated frames
for (int i = 0; i < duplication_count; i++) {
std::lock_guard<std::mutex> lock(queue_mutex_);
if (frame_queue_.size() < 150) { // Increased queue size for initial frames
frame_queue_.push(frame.clone());
queue_cv_.notify_one();
} else {
break; // Prevent memory overflow
}
}
}
void MP4Recorder::cleanup() {
// Reset frame rate monitoring
frame_count_ = 0;
current_fps_ = target_fps_;
has_last_frame_ = false;
// Cleanup is handled in stopRecording()
}
\ No newline at end of file
... ...
#ifndef MP4RECORDER_H
#define MP4RECORDER_H
#include <string>
#include <queue>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <opencv2/opencv.hpp>
#include "av_writer.h"
#include "audio_recorder.h"
class MP4Recorder {
public:
MP4Recorder();
~MP4Recorder();
bool startRecording(const std::string& outputPath, int width, int height, int fps = 30, bool enableAudio = true);
bool stopRecording();
bool isRecording() const { return recording_; }
bool writeFrame(const cv::Mat& frame);
void setFrameSize(int width, int height);
// Audio recording control
bool isAudioEnabled() const { return audio_enabled_; }
// Frame rate monitoring
float getCurrentFPS() const { return current_fps_; }
private:
void writerThread();
void audioThread();
void cleanup();
void updateFrameRate();
void writeFrameWithDuplication(const cv::Mat& frame);
std::string output_path_;
bool recording_;
bool should_stop_;
bool audio_enabled_;
int frame_width_;
int frame_height_;
int target_fps_;
float current_fps_;
// Frame rate monitoring
std::chrono::steady_clock::time_point last_frame_time_;
std::chrono::steady_clock::time_point recording_start_time_;
int frame_count_;
float fps_smoothing_factor_;
// Fast FPS detection for initial frames
std::vector<std::chrono::steady_clock::time_point> recent_frame_times_;
int max_recent_frames_;
bool use_fast_detection_;
// Frame duplication for low FPS
cv::Mat last_frame_;
bool has_last_frame_;
int min_fps_threshold_;
std::queue<cv::Mat> frame_queue_;
std::mutex queue_mutex_;
std::condition_variable queue_cv_;
std::thread writer_thread_;
// Audio recording
AudioRecorder audio_recorder_;
std::thread audio_thread_;
std::mutex audio_mutex_;
AVWriter av_writer_;
std::mutex writer_mutex_;
};
#endif // MP4RECORDER_H
\ No newline at end of file
... ...
#include "simple_mp4_writer.h"
#include <android/log.h>
#define LOG_TAG "SimpleMP4Writer"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
SimpleMP4Writer::SimpleMP4Writer()
: width_(0)
, height_(0)
, fps_(30)
, frame_count_(0)
, is_open_(false)
{
}
SimpleMP4Writer::~SimpleMP4Writer() {
close();
}
bool SimpleMP4Writer::open(const std::string& filename, int width, int height, int fps) {
if (is_open_) {
close();
}
filename_ = filename;
width_ = width;
height_ = height;
fps_ = fps;
frame_count_ = 0;
// For simplicity, we'll create an AVI file instead of MP4
// This is easier to implement without external libraries
std::string avi_filename = filename;
size_t pos = avi_filename.find_last_of('.');
if (pos != std::string::npos) {
avi_filename = avi_filename.substr(0, pos) + ".avi";
}
file_.open(avi_filename, std::ios::binary);
if (!file_.is_open()) {
LOGE("Failed to open file: %s", avi_filename.c_str());
return false;
}
writeHeader();
is_open_ = true;
LOGI("Opened video file: %s (%dx%d @ %dfps)", avi_filename.c_str(), width, height, fps);
return true;
}
bool SimpleMP4Writer::write(const cv::Mat& frame) {
if (!is_open_) {
return false;
}
cv::Mat bgr_frame;
if (frame.channels() == 4) {
cv::cvtColor(frame, bgr_frame, cv::COLOR_RGBA2BGR);
} else if (frame.channels() == 3) {
bgr_frame = frame;
} else {
LOGE("Unsupported frame format");
return false;
}
// Resize if necessary
if (bgr_frame.cols != width_ || bgr_frame.rows != height_) {
cv::resize(bgr_frame, bgr_frame, cv::Size(width_, height_));
}
writeFrame(bgr_frame);
frame_count_++;
return true;
}
void SimpleMP4Writer::close() {
if (is_open_) {
finalize();
file_.close();
is_open_ = false;
LOGI("Closed video file: %s (%d frames)", filename_.c_str(), frame_count_);
}
}
void SimpleMP4Writer::writeHeader() {
// Write a simple AVI header
// This is a minimal implementation for demonstration
// RIFF header
file_.write("RIFF", 4);
uint32_t file_size = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&file_size), 4);
file_.write("AVI ", 4);
// LIST hdrl
file_.write("LIST", 4);
uint32_t hdrl_size = 208; // Approximate size
file_.write(reinterpret_cast<const char*>(&hdrl_size), 4);
file_.write("hdrl", 4);
// avih chunk
file_.write("avih", 4);
uint32_t avih_size = 56;
file_.write(reinterpret_cast<const char*>(&avih_size), 4);
// AVI main header
uint32_t microsec_per_frame = 1000000 / fps_;
file_.write(reinterpret_cast<const char*>(&microsec_per_frame), 4);
uint32_t max_bytes_per_sec = width_ * height_ * 3 * fps_;
file_.write(reinterpret_cast<const char*>(&max_bytes_per_sec), 4);
uint32_t padding = 0;
file_.write(reinterpret_cast<const char*>(&padding), 4);
uint32_t flags = 0x10; // AVIF_HASINDEX
file_.write(reinterpret_cast<const char*>(&flags), 4);
uint32_t total_frames = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
uint32_t initial_frames = 0;
file_.write(reinterpret_cast<const char*>(&initial_frames), 4);
uint32_t streams = 1;
file_.write(reinterpret_cast<const char*>(&streams), 4);
uint32_t suggested_buffer_size = width_ * height_ * 3;
file_.write(reinterpret_cast<const char*>(&suggested_buffer_size), 4);
uint32_t width = width_;
file_.write(reinterpret_cast<const char*>(&width), 4);
uint32_t height = height_;
file_.write(reinterpret_cast<const char*>(&height), 4);
uint32_t reserved[4] = {0, 0, 0, 0};
file_.write(reinterpret_cast<const char*>(reserved), 16);
// LIST strl
file_.write("LIST", 4);
uint32_t strl_size = 140;
file_.write(reinterpret_cast<const char*>(&strl_size), 4);
file_.write("strl", 4);
// strh chunk
file_.write("strh", 4);
uint32_t strh_size = 56;
file_.write(reinterpret_cast<const char*>(&strh_size), 4);
file_.write("vids", 4); // Stream type
file_.write("DIB ", 4); // Handler
uint32_t strh_flags = 0;
file_.write(reinterpret_cast<const char*>(&strh_flags), 4);
uint16_t priority = 0;
file_.write(reinterpret_cast<const char*>(&priority), 2);
uint16_t language = 0;
file_.write(reinterpret_cast<const char*>(&language), 2);
uint32_t initial_frames_strh = 0;
file_.write(reinterpret_cast<const char*>(&initial_frames_strh), 4);
uint32_t scale = 1;
file_.write(reinterpret_cast<const char*>(&scale), 4);
uint32_t rate = fps_;
file_.write(reinterpret_cast<const char*>(&rate), 4);
uint32_t start = 0;
file_.write(reinterpret_cast<const char*>(&start), 4);
uint32_t length = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&length), 4);
uint32_t suggested_buffer_size_strh = width_ * height_ * 3;
file_.write(reinterpret_cast<const char*>(&suggested_buffer_size_strh), 4);
uint32_t quality = 0;
file_.write(reinterpret_cast<const char*>(&quality), 4);
uint32_t sample_size = 0;
file_.write(reinterpret_cast<const char*>(&sample_size), 4);
uint16_t left = 0, top = 0, right = width_, bottom = height_;
file_.write(reinterpret_cast<const char*>(&left), 2);
file_.write(reinterpret_cast<const char*>(&top), 2);
file_.write(reinterpret_cast<const char*>(&right), 2);
file_.write(reinterpret_cast<const char*>(&bottom), 2);
// strf chunk (BITMAPINFOHEADER)
file_.write("strf", 4);
uint32_t strf_size = 40;
file_.write(reinterpret_cast<const char*>(&strf_size), 4);
uint32_t bi_size = 40;
file_.write(reinterpret_cast<const char*>(&bi_size), 4);
int32_t bi_width = width_;
file_.write(reinterpret_cast<const char*>(&bi_width), 4);
int32_t bi_height = height_;
file_.write(reinterpret_cast<const char*>(&bi_height), 4);
uint16_t bi_planes = 1;
file_.write(reinterpret_cast<const char*>(&bi_planes), 2);
uint16_t bi_bit_count = 24;
file_.write(reinterpret_cast<const char*>(&bi_bit_count), 2);
uint32_t bi_compression = 0; // BI_RGB
file_.write(reinterpret_cast<const char*>(&bi_compression), 4);
uint32_t bi_size_image = width_ * height_ * 3;
file_.write(reinterpret_cast<const char*>(&bi_size_image), 4);
int32_t bi_x_pels_per_meter = 0;
file_.write(reinterpret_cast<const char*>(&bi_x_pels_per_meter), 4);
int32_t bi_y_pels_per_meter = 0;
file_.write(reinterpret_cast<const char*>(&bi_y_pels_per_meter), 4);
uint32_t bi_clr_used = 0;
file_.write(reinterpret_cast<const char*>(&bi_clr_used), 4);
uint32_t bi_clr_important = 0;
file_.write(reinterpret_cast<const char*>(&bi_clr_important), 4);
// LIST movi
file_.write("LIST", 4);
uint32_t movi_size = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&movi_size), 4);
file_.write("movi", 4);
}
void SimpleMP4Writer::writeFrame(const cv::Mat& frame) {
// Write frame as uncompressed DIB
file_.write("00db", 4); // Chunk ID
uint32_t chunk_size = frame.total() * frame.elemSize();
file_.write(reinterpret_cast<const char*>(&chunk_size), 4);
// Write frame data (RGB format, bottom-up)
// Note: frame should already be in RGB format from MP4Recorder
for (int y = frame.rows - 1; y >= 0; y--) {
file_.write(reinterpret_cast<const char*>(frame.ptr(y)), frame.cols * 3);
}
// Pad to even boundary
if (chunk_size % 2 == 1) {
char pad = 0;
file_.write(&pad, 1);
}
}
void SimpleMP4Writer::finalize() {
// Update file size and frame count in header
std::streampos current_pos = file_.tellp();
uint32_t file_size = static_cast<uint32_t>(current_pos) - 8;
// Update RIFF size
file_.seekp(4);
file_.write(reinterpret_cast<const char*>(&file_size), 4);
// Update total frames in avih
file_.seekp(48);
uint32_t total_frames = frame_count_;
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
// Update length in strh
file_.seekp(140);
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
// Update movi size
uint32_t movi_size = static_cast<uint32_t>(current_pos) - 220;
file_.seekp(216);
file_.write(reinterpret_cast<const char*>(&movi_size), 4);
file_.seekp(current_pos);
}
\ No newline at end of file
... ...
#ifndef SIMPLE_MP4_WRITER_H
#define SIMPLE_MP4_WRITER_H
#include <string>
#include <vector>
#include <fstream>
#include <opencv2/opencv.hpp>
class SimpleMP4Writer {
public:
SimpleMP4Writer();
~SimpleMP4Writer();
bool open(const std::string& filename, int width, int height, int fps);
bool write(const cv::Mat& frame);
void close();
private:
void writeHeader();
void writeFrame(const cv::Mat& frame);
void finalize();
std::ofstream file_;
std::string filename_;
int width_;
int height_;
int fps_;
int frame_count_;
bool is_open_;
std::vector<uint8_t> frame_buffer_;
};
#endif // SIMPLE_MP4_WRITER_H
\ No newline at end of file
... ...
// Tencent is pleased to support the open source community by making ncnn available.
//
// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
//
// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
// in compliance with the License. You may obtain a copy of the License at
//
// https://opensource.org/licenses/BSD-3-Clause
//
// Unless required by applicable law or agreed to in writing, software distributed
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
// specific language governing permissions and limitations under the License.
#include <android/asset_manager_jni.h>
#include <android/native_window_jni.h>
#include <android/native_window.h>
#include <android/log.h>
#include <jni.h>
#include <string>
#include <vector>
#include <platform.h>
#include <benchmark.h>
#include "yolov8.h"
#include "ndkcamera.h"
#include "mp4recorder.h"
#include <opencv2/core/core.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#if __ARM_NEON
#include <arm_neon.h>
#endif // __ARM_NEON
static int draw_unsupported(cv::Mat& rgb)
{
const char text[] = "unsupported";
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 1.0, 1, &baseLine);
int y = (rgb.rows - label_size.height) / 2;
int x = (rgb.cols - label_size.width) / 2;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 1.0, cv::Scalar(0, 0, 0));
return 0;
}
static int draw_fps(cv::Mat& rgb)
{
// resolve moving average
float avg_fps = 0.f;
{
static double t0 = 0.f;
static float fps_history[10] = {0.f};
double t1 = ncnn::get_current_time();
if (t0 == 0.f)
{
t0 = t1;
return 0;
}
float fps = 1000.f / (t1 - t0);
t0 = t1;
for (int i = 9; i >= 1; i--)
{
fps_history[i] = fps_history[i - 1];
}
fps_history[0] = fps;
if (fps_history[9] == 0.f)
{
return 0;
}
for (int i = 0; i < 10; i++)
{
avg_fps += fps_history[i];
}
avg_fps /= 10.f;
}
char text[32];
sprintf(text, "FPS=%.2f", avg_fps);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int y = 0;
int x = rgb.cols - label_size.width;
cv::rectangle(rgb, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(rgb, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
return 0;
}
static YOLOv8* g_yolov8 = 0;
static ncnn::Mutex lock;
static MP4Recorder* g_recorder = 0;
static ncnn::Mutex recorder_lock;
class MyNdkCamera : public NdkCameraWindow
{
public:
virtual void on_image_render(cv::Mat& rgb) const;
};
void MyNdkCamera::on_image_render(cv::Mat& rgb) const
{
// yolov8
{
ncnn::MutexLockGuard g(lock);
if (g_yolov8)
{
std::vector<Object> objects;
g_yolov8->detect(rgb, objects);
g_yolov8->draw(rgb, objects);
}
else
{
draw_unsupported(rgb);
}
}
draw_fps(rgb);
// Record frame if recording is active
{
ncnn::MutexLockGuard g(recorder_lock);
if (g_recorder && g_recorder->isRecording()) {
g_recorder->writeFrame(rgb);
}
}
}
static MyNdkCamera* g_camera = 0;
extern "C" {
JNIEXPORT jint JNI_OnLoad(JavaVM* vm, void* reserved)
{
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnLoad");
g_camera = new MyNdkCamera;
g_recorder = new MP4Recorder;
ncnn::create_gpu_instance();
return JNI_VERSION_1_4;
}
JNIEXPORT void JNI_OnUnload(JavaVM* vm, void* reserved)
{
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "JNI_OnUnload");
{
ncnn::MutexLockGuard g(lock);
delete g_yolov8;
g_yolov8 = 0;
}
{
ncnn::MutexLockGuard g(recorder_lock);
delete g_recorder;
g_recorder = 0;
}
ncnn::destroy_gpu_instance();
delete g_camera;
g_camera = 0;
}
// public native boolean loadModel(AssetManager mgr, int taskid, int modelid, int cpugpu);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_loadModel(JNIEnv* env, jobject thiz, jobject assetManager, jint taskid, jint modelid, jint cpugpu)
{
if (taskid < 0 || taskid > 5 || modelid < 0 || modelid > 8 || cpugpu < 0 || cpugpu > 2)
{
return JNI_FALSE;
}
AAssetManager* mgr = AAssetManager_fromJava(env, assetManager);
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "loadModel %p", mgr);
const char* tasknames[6] =
{
"",
"_oiv7",
"_seg",
"_pose",
"_cls",
"_obb"
};
const char* modeltypes[9] =
{
"n",
"s",
"m",
"n",
"s",
"m",
"n",
"s",
"m"
};
std::string parampath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.param";
std::string modelpath = std::string("yolov8") + modeltypes[(int)modelid] + tasknames[(int)taskid] + ".ncnn.bin";
bool use_gpu = (int)cpugpu == 1;
bool use_turnip = (int)cpugpu == 2;
// reload
{
ncnn::MutexLockGuard g(lock);
{
static int old_taskid = 0;
static int old_modelid = 0;
static int old_cpugpu = 0;
if (taskid != old_taskid || (modelid % 3) != old_modelid || cpugpu != old_cpugpu)
{
// taskid or model or cpugpu changed
delete g_yolov8;
g_yolov8 = 0;
}
old_taskid = taskid;
old_modelid = modelid % 3;
old_cpugpu = cpugpu;
ncnn::destroy_gpu_instance();
if (use_turnip)
{
ncnn::create_gpu_instance("libvulkan_freedreno.so");
}
else if (use_gpu)
{
ncnn::create_gpu_instance();
}
if (!g_yolov8)
{
if (taskid == 0) g_yolov8 = new YOLOv8_det_coco;
if (taskid == 1) g_yolov8 = new YOLOv8_det_oiv7;
if (taskid == 2) g_yolov8 = new YOLOv8_seg;
if (taskid == 3) g_yolov8 = new YOLOv8_pose;
if (taskid == 4) g_yolov8 = new YOLOv8_cls;
if (taskid == 5) g_yolov8 = new YOLOv8_obb;
g_yolov8->load(mgr, parampath.c_str(), modelpath.c_str(), use_gpu || use_turnip);
}
int target_size = 320;
if ((int)modelid >= 3)
target_size = 480;
if ((int)modelid >= 6)
target_size = 640;
g_yolov8->set_det_target_size(target_size);
}
}
return JNI_TRUE;
}
// public native boolean openCamera(int facing);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_openCamera(JNIEnv* env, jobject thiz, jint facing)
{
if (facing < 0 || facing > 1)
return JNI_FALSE;
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "openCamera %d", facing);
g_camera->open((int)facing);
return JNI_TRUE;
}
// public native boolean closeCamera();
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_closeCamera(JNIEnv* env, jobject thiz)
{
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "closeCamera");
g_camera->close();
return JNI_TRUE;
}
// public native boolean setOutputWindow(Surface surface);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_setOutputWindow(JNIEnv* env, jobject thiz, jobject surface)
{
ANativeWindow* win = ANativeWindow_fromSurface(env, surface);
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "setOutputWindow %p", win);
g_camera->set_window(win);
return JNI_TRUE;
}
// public native boolean startRecording(String outputPath);
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_startRecording(JNIEnv* env, jobject thiz, jstring outputPath)
{
if (!g_recorder) {
__android_log_print(ANDROID_LOG_ERROR, "ncnn", "Recorder not initialized");
return JNI_FALSE;
}
const char* path = env->GetStringUTFChars(outputPath, nullptr);
if (!path) {
__android_log_print(ANDROID_LOG_ERROR, "ncnn", "Failed to get output path");
return JNI_FALSE;
}
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "startRecording %s", path);
bool success = false;
{
ncnn::MutexLockGuard g(recorder_lock);
// Use dynamic resolution based on actual frame size
// Will be set when first frame is received
// Enable audio recording by default
success = g_recorder->startRecording(std::string(path), 0, 0, 30, true);
}
env->ReleaseStringUTFChars(outputPath, path);
return success ? JNI_TRUE : JNI_FALSE;
}
// public native boolean stopRecording();
JNIEXPORT jboolean JNICALL Java_com_tencent_yolov8ncnn_YOLOv8Ncnn_stopRecording(JNIEnv* env, jobject thiz)
{
if (!g_recorder) {
__android_log_print(ANDROID_LOG_ERROR, "ncnn", "Recorder not initialized");
return JNI_FALSE;
}
__android_log_print(ANDROID_LOG_DEBUG, "ncnn", "stopRecording");
bool success = false;
{
ncnn::MutexLockGuard g(recorder_lock);
success = g_recorder->stopRecording();
}
return success ? JNI_TRUE : JNI_FALSE;
}
}
... ...