av_writer.cpp
14.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
#include "av_writer.h"
#include <android/log.h>
#include <cstring>
#define LOG_TAG "AVWriter"
#define LOGI(...) __android_log_print(ANDROID_LOG_INFO, LOG_TAG, __VA_ARGS__)
#define LOGE(...) __android_log_print(ANDROID_LOG_ERROR, LOG_TAG, __VA_ARGS__)
// AVI format constants
#define FOURCC(a,b,c,d) ((uint32_t)(a) | ((uint32_t)(b) << 8) | ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24))
static const uint32_t RIFF_FOURCC = FOURCC('R','I','F','F');
static const uint32_t AVI_FOURCC = FOURCC('A','V','I',' ');
static const uint32_t LIST_FOURCC = FOURCC('L','I','S','T');
static const uint32_t HDRL_FOURCC = FOURCC('h','d','r','l');
static const uint32_t AVIH_FOURCC = FOURCC('a','v','i','h');
static const uint32_t STRL_FOURCC = FOURCC('s','t','r','l');
static const uint32_t STRH_FOURCC = FOURCC('s','t','r','h');
static const uint32_t STRF_FOURCC = FOURCC('s','t','r','f');
static const uint32_t MOVI_FOURCC = FOURCC('m','o','v','i');
static const uint32_t VIDS_FOURCC = FOURCC('v','i','d','s');
static const uint32_t AUDS_FOURCC = FOURCC('a','u','d','s');
static const uint32_t DIB_FOURCC = FOURCC('D','I','B',' ');
static const uint32_t PCM_FOURCC = FOURCC('P','C','M',' ');
static const uint32_t DC00_FOURCC = FOURCC('0','0','d','c');
static const uint32_t WB01_FOURCC = FOURCC('0','1','w','b');
AVWriter::AVWriter()
: is_open_(false)
, width_(0)
, height_(0)
, fps_(30)
, video_frame_count_(0)
, audio_enabled_(false)
, sample_rate_(44100)
, channels_(1)
, audio_sample_count_(0)
, total_video_size_(0)
, total_audio_size_(0)
{
}
AVWriter::~AVWriter() {
close();
}
bool AVWriter::open(const std::string& filename, int width, int height, int fps,
bool enableAudio, int sampleRate, int channels) {
if (is_open_) {
close();
}
filename_ = filename;
width_ = width;
height_ = height;
fps_ = fps;
audio_enabled_ = enableAudio;
sample_rate_ = sampleRate;
channels_ = channels;
video_frame_count_ = 0;
audio_sample_count_ = 0;
total_video_size_ = 0;
total_audio_size_ = 0;
// Change extension to .avi
std::string avi_filename = filename;
size_t pos = avi_filename.find_last_of('.');
if (pos != std::string::npos) {
avi_filename = avi_filename.substr(0, pos) + ".avi";
}
file_.open(avi_filename, std::ios::binary);
if (!file_.is_open()) {
LOGE("Failed to open file: %s", avi_filename.c_str());
return false;
}
writeAVIHeader();
is_open_ = true;
LOGI("Opened AV file: %s (%dx%d @ %dfps) Audio: %s",
avi_filename.c_str(), width, height, fps,
audio_enabled_ ? "ON" : "OFF");
return true;
}
bool AVWriter::writeVideoFrame(const cv::Mat& frame) {
if (!is_open_ || frame.empty()) {
return false;
}
writeVideoFrameInternal(frame);
video_frame_count_++;
return true;
}
bool AVWriter::writeAudioData(const std::vector<short>& audioData) {
if (!is_open_ || !audio_enabled_ || audioData.empty()) {
return false;
}
writeAudioChunk(audioData);
audio_sample_count_ += audioData.size();
return true;
}
void AVWriter::writeAVIHeader() {
// Calculate frame size
int frame_size = width_ * height_ * 3; // RGB24
int microsec_per_frame = 1000000 / fps_;
// RIFF header
file_.write("RIFF", 4);
file_size_pos_ = file_.tellp();
uint32_t file_size = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&file_size), 4);
file_.write("AVI ", 4);
// LIST hdrl
file_.write("LIST", 4);
uint32_t hdrl_size = audio_enabled_ ? 308 : 244; // Approximate size
file_.write(reinterpret_cast<const char*>(&hdrl_size), 4);
file_.write("hdrl", 4);
// avih (main AVI header)
file_.write("avih", 4);
uint32_t avih_size = 56;
file_.write(reinterpret_cast<const char*>(&avih_size), 4);
uint32_t microsec_per_frame_val = microsec_per_frame;
file_.write(reinterpret_cast<const char*>(µsec_per_frame_val), 4);
uint32_t max_bytes_per_sec = frame_size * fps_;
file_.write(reinterpret_cast<const char*>(&max_bytes_per_sec), 4);
uint32_t padding_granularity = 0;
file_.write(reinterpret_cast<const char*>(&padding_granularity), 4);
uint32_t flags = 0x10; // AVIF_HASINDEX
file_.write(reinterpret_cast<const char*>(&flags), 4);
video_frames_pos_ = file_.tellp();
uint32_t total_frames = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
uint32_t initial_frames = 0;
file_.write(reinterpret_cast<const char*>(&initial_frames), 4);
uint32_t streams = audio_enabled_ ? 2 : 1;
file_.write(reinterpret_cast<const char*>(&streams), 4);
uint32_t suggested_buffer_size = frame_size;
file_.write(reinterpret_cast<const char*>(&suggested_buffer_size), 4);
uint32_t width = width_;
file_.write(reinterpret_cast<const char*>(&width), 4);
uint32_t height = height_;
file_.write(reinterpret_cast<const char*>(&height), 4);
uint32_t reserved[4] = {0, 0, 0, 0};
file_.write(reinterpret_cast<const char*>(reserved), 16);
// Video stream header
file_.write("LIST", 4);
uint32_t strl_size = 116;
file_.write(reinterpret_cast<const char*>(&strl_size), 4);
file_.write("strl", 4);
// strh (stream header)
file_.write("strh", 4);
uint32_t strh_size = 56;
file_.write(reinterpret_cast<const char*>(&strh_size), 4);
file_.write("vids", 4); // fccType
file_.write("DIB ", 4); // fccHandler
uint32_t stream_flags = 0;
file_.write(reinterpret_cast<const char*>(&stream_flags), 4);
uint16_t priority = 0;
file_.write(reinterpret_cast<const char*>(&priority), 2);
uint16_t language = 0;
file_.write(reinterpret_cast<const char*>(&language), 2);
uint32_t initial_frames_stream = 0;
file_.write(reinterpret_cast<const char*>(&initial_frames_stream), 4);
uint32_t scale = 1;
file_.write(reinterpret_cast<const char*>(&scale), 4);
uint32_t rate = fps_;
file_.write(reinterpret_cast<const char*>(&rate), 4);
uint32_t start = 0;
file_.write(reinterpret_cast<const char*>(&start), 4);
video_length_pos_ = file_.tellp(); // Save position to update later
uint32_t length = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&length), 4);
uint32_t suggested_buffer_size_stream = frame_size;
file_.write(reinterpret_cast<const char*>(&suggested_buffer_size_stream), 4);
uint32_t quality = 0;
file_.write(reinterpret_cast<const char*>(&quality), 4);
uint32_t sample_size = 0;
file_.write(reinterpret_cast<const char*>(&sample_size), 4);
uint16_t left = 0, top = 0, right = width_, bottom = height_;
file_.write(reinterpret_cast<const char*>(&left), 2);
file_.write(reinterpret_cast<const char*>(&top), 2);
file_.write(reinterpret_cast<const char*>(&right), 2);
file_.write(reinterpret_cast<const char*>(&bottom), 2);
// strf (stream format)
file_.write("strf", 4);
uint32_t strf_size = 40;
file_.write(reinterpret_cast<const char*>(&strf_size), 4);
// BITMAPINFOHEADER
uint32_t bi_size = 40;
file_.write(reinterpret_cast<const char*>(&bi_size), 4);
int32_t bi_width = width_;
file_.write(reinterpret_cast<const char*>(&bi_width), 4);
int32_t bi_height = height_;
file_.write(reinterpret_cast<const char*>(&bi_height), 4);
uint16_t bi_planes = 1;
file_.write(reinterpret_cast<const char*>(&bi_planes), 2);
uint16_t bi_bit_count = 24;
file_.write(reinterpret_cast<const char*>(&bi_bit_count), 2);
uint32_t bi_compression = 0; // BI_RGB
file_.write(reinterpret_cast<const char*>(&bi_compression), 4);
uint32_t bi_size_image = frame_size;
file_.write(reinterpret_cast<const char*>(&bi_size_image), 4);
int32_t bi_x_pels_per_meter = 0;
file_.write(reinterpret_cast<const char*>(&bi_x_pels_per_meter), 4);
int32_t bi_y_pels_per_meter = 0;
file_.write(reinterpret_cast<const char*>(&bi_y_pels_per_meter), 4);
uint32_t bi_clr_used = 0;
file_.write(reinterpret_cast<const char*>(&bi_clr_used), 4);
uint32_t bi_clr_important = 0;
file_.write(reinterpret_cast<const char*>(&bi_clr_important), 4);
// Audio stream header (if enabled)
if (audio_enabled_) {
file_.write("LIST", 4);
uint32_t audio_strl_size = 92;
file_.write(reinterpret_cast<const char*>(&audio_strl_size), 4);
file_.write("strl", 4);
// Audio strh
file_.write("strh", 4);
uint32_t audio_strh_size = 56;
file_.write(reinterpret_cast<const char*>(&audio_strh_size), 4);
file_.write("auds", 4); // fccType
uint32_t audio_handler = 0;
file_.write(reinterpret_cast<const char*>(&audio_handler), 4);
uint32_t audio_stream_flags = 0;
file_.write(reinterpret_cast<const char*>(&audio_stream_flags), 4);
uint16_t audio_priority = 0;
file_.write(reinterpret_cast<const char*>(&audio_priority), 2);
uint16_t audio_language = 0;
file_.write(reinterpret_cast<const char*>(&audio_language), 2);
uint32_t audio_initial_frames = 0;
file_.write(reinterpret_cast<const char*>(&audio_initial_frames), 4);
uint32_t audio_scale = 1;
file_.write(reinterpret_cast<const char*>(&audio_scale), 4);
uint32_t audio_rate = sample_rate_;
file_.write(reinterpret_cast<const char*>(&audio_rate), 4);
uint32_t audio_start = 0;
file_.write(reinterpret_cast<const char*>(&audio_start), 4);
audio_samples_pos_ = file_.tellp();
uint32_t audio_length = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&audio_length), 4);
uint32_t audio_suggested_buffer_size = sample_rate_ * channels_ * 2; // 1 second buffer
file_.write(reinterpret_cast<const char*>(&audio_suggested_buffer_size), 4);
uint32_t audio_quality = 0;
file_.write(reinterpret_cast<const char*>(&audio_quality), 4);
uint32_t audio_sample_size = channels_ * 2; // 16-bit samples
file_.write(reinterpret_cast<const char*>(&audio_sample_size), 4);
uint32_t audio_reserved[2] = {0, 0};
file_.write(reinterpret_cast<const char*>(audio_reserved), 8);
// Audio strf (WAVEFORMATEX)
file_.write("strf", 4);
uint32_t audio_strf_size = 16;
file_.write(reinterpret_cast<const char*>(&audio_strf_size), 4);
uint16_t format_tag = 1; // PCM
file_.write(reinterpret_cast<const char*>(&format_tag), 2);
uint16_t audio_channels = channels_;
file_.write(reinterpret_cast<const char*>(&audio_channels), 2);
uint32_t samples_per_sec = sample_rate_;
file_.write(reinterpret_cast<const char*>(&samples_per_sec), 4);
uint32_t avg_bytes_per_sec = sample_rate_ * channels_ * 2;
file_.write(reinterpret_cast<const char*>(&avg_bytes_per_sec), 4);
uint16_t block_align = channels_ * 2;
file_.write(reinterpret_cast<const char*>(&block_align), 2);
uint16_t bits_per_sample = 16;
file_.write(reinterpret_cast<const char*>(&bits_per_sample), 2);
}
// LIST movi
file_.write("LIST", 4);
movi_size_pos_ = file_.tellp();
uint32_t movi_size = 0; // Will be updated later
file_.write(reinterpret_cast<const char*>(&movi_size), 4);
file_.write("movi", 4);
movi_list_pos_ = file_.tellp();
}
void AVWriter::writeVideoFrameInternal(const cv::Mat& frame) {
if (frame.empty()) return;
// Convert frame to BGR if needed and flip vertically (AVI requirement)
cv::Mat bgr_frame;
if (frame.channels() == 3) {
cv::flip(frame, bgr_frame, 0); // Flip vertically
} else if (frame.channels() == 4) {
cv::Mat temp;
cv::cvtColor(frame, temp, cv::COLOR_RGBA2BGR);
cv::flip(temp, bgr_frame, 0);
} else {
LOGE("Unsupported frame format");
return;
}
// Resize if necessary
if (bgr_frame.cols != width_ || bgr_frame.rows != height_) {
cv::resize(bgr_frame, bgr_frame, cv::Size(width_, height_));
}
// Write video chunk
file_.write("00dc", 4); // Video chunk ID
uint32_t chunk_size = bgr_frame.total() * bgr_frame.elemSize();
file_.write(reinterpret_cast<const char*>(&chunk_size), 4);
file_.write(reinterpret_cast<const char*>(bgr_frame.data), chunk_size);
// Pad to even boundary
if (chunk_size % 2 == 1) {
char pad = 0;
file_.write(&pad, 1);
}
total_video_size_ += chunk_size + 8 + (chunk_size % 2);
}
void AVWriter::writeAudioChunk(const std::vector<short>& audioData) {
if (audioData.empty()) return;
// Write audio chunk
file_.write("01wb", 4); // Audio chunk ID
uint32_t chunk_size = audioData.size() * sizeof(short);
file_.write(reinterpret_cast<const char*>(&chunk_size), 4);
file_.write(reinterpret_cast<const char*>(audioData.data()), chunk_size);
// Pad to even boundary
if (chunk_size % 2 == 1) {
char pad = 0;
file_.write(&pad, 1);
}
total_audio_size_ += chunk_size + 8 + (chunk_size % 2);
}
void AVWriter::close() {
if (!is_open_) {
return;
}
finalize();
file_.close();
is_open_ = false;
LOGI("Closed AV file: %s (%d video frames, %d audio samples)",
filename_.c_str(), video_frame_count_, audio_sample_count_);
}
void AVWriter::finalize() {
updateHeaders();
}
void AVWriter::updateHeaders() {
std::streampos current_pos = file_.tellp();
// Update file size
file_.seekp(file_size_pos_);
uint32_t file_size = static_cast<uint32_t>(current_pos) - 8;
file_.write(reinterpret_cast<const char*>(&file_size), 4);
// Update total frames
file_.seekp(video_frames_pos_);
uint32_t total_frames = video_frame_count_;
file_.write(reinterpret_cast<const char*>(&total_frames), 4);
// Update video stream length (critical for correct duration calculation)
file_.seekp(video_length_pos_);
uint32_t video_length = video_frame_count_;
file_.write(reinterpret_cast<const char*>(&video_length), 4);
// Update audio samples if audio is enabled
if (audio_enabled_) {
file_.seekp(audio_samples_pos_);
uint32_t audio_length = audio_sample_count_;
file_.write(reinterpret_cast<const char*>(&audio_length), 4);
}
// Update movi size
file_.seekp(movi_size_pos_);
uint32_t movi_size = total_video_size_ + total_audio_size_ + 4; // +4 for "movi"
file_.write(reinterpret_cast<const char*>(&movi_size), 4);
// Restore position
file_.seekp(current_pos);
}