胡斌

part of refactor the process

@@ -402,18 +402,44 @@ int merge_audio_file(vector<string> & files, const char * dest) @@ -402,18 +402,44 @@ int merge_audio_file(vector<string> & files, const char * dest)
402 return 0; 402 return 0;
403 } 403 }
404 404
405 -int process_files(const char * output_dest_file)  
406 -{  
407 - vector<fileinfo> & filesaudio = media_files[type_audio];  
408 - vector<fileinfo> & filesvideo = media_files[type_video]; 405 +class MergeProcess{
  406 +public:
  407 + MergeProcess(vector<fileinfo> & a, vector<fileinfo> & v);
  408 + int process_files(const char * output_dest_file);
  409 +
  410 +protected:
  411 + void init();
  412 + void adjust_va_timestamp();
  413 + void merge_left_audio();
  414 + int process_video_ahead_of_audio();
  415 + int process_video_behind_audio();
  416 + int process_video_align_audio();
  417 + int process_video_loop();
  418 + int process_va();
  419 +
  420 +protected:
  421 + vector<fileinfo> & filesaudio;
  422 + vector<fileinfo> & filesvideo;
409 vector<string > merged_files; 423 vector<string > merged_files;
410 vector<string> tmp_files; 424 vector<string> tmp_files;
411 - int nv = 0;  
412 - int nf = 0;  
413 - char destfile[1024],audio_file[1024],pic_file[1024]; 425 + int nv; // the index of processing video file
  426 + int nf;//the index of processing target merged ts
  427 + char destfile[1024], audio_file[1024], pic_file[1024];
414 char blank_pic_file[1024]; 428 char blank_pic_file[1024];
415 char silence_aac_file[1024]; 429 char silence_aac_file[1024];
  430 + int audio_index;
  431 + int audio_start;
  432 + fileinfo audio;
  433 + fileinfo video;
  434 +};
  435 +
  436 +MergeProcess::MergeProcess(vector<fileinfo> & a, vector<fileinfo> & v) :filesaudio(a), filesvideo(v)
  437 +{
  438 + init();
  439 +}
416 440
  441 +void MergeProcess::init()
  442 +{
417 strcpy(blank_pic_file, cfg_path); 443 strcpy(blank_pic_file, cfg_path);
418 strcat(blank_pic_file, "blank.jpg"); 444 strcat(blank_pic_file, "blank.jpg");
419 445
@@ -422,318 +448,365 @@ int process_files(const char * output_dest_file) @@ -422,318 +448,365 @@ int process_files(const char * output_dest_file)
422 448
423 check_audio_duration(); 449 check_audio_duration();
424 get_duration_from_video_file(); 450 get_duration_from_video_file();
425 - //don't split video, for a video, using merged audios to mix with it  
426 - //for audio, mix with video or jpg  
427 -  
428 - if (filesvideo.size()) {//has video files  
429 - if (filesaudio.size()){  
430 - for (int i = 0; i < filesaudio.size(); i++){ //  
431 - fileinfo audio = filesaudio[i];  
432 - float audio_start = audio.start_time; 451 + nv = 0;
  452 + nf = 0;
  453 + audio_index = 0;
  454 +}
433 455
434 - for (int j = i; j < filesaudio.size(); j++){  
435 - filesaudio[j].start_time -= audio_start;  
436 - filesaudio[j].end_time -= audio_start;  
437 - } 456 +void MergeProcess::adjust_va_timestamp()
  457 +{
  458 + fileinfo audio = filesaudio[audio_index];
  459 + float audio_start = audio.start_time;
438 460
439 - for (int j = nv; j < filesvideo.size(); j++) {  
440 - filesvideo[j].start_time -= audio_start;  
441 - filesvideo[j].end_time -= audio_start;  
442 - } 461 + for (int j = audio_index; j < filesaudio.size(); j++){
  462 + filesaudio[j].start_time -= audio_start;
  463 + filesaudio[j].end_time -= audio_start;
  464 + }
443 465
444 - audio = filesaudio[i]; 466 + for (int j = nv; j < filesvideo.size(); j++) {
  467 + filesvideo[j].start_time -= audio_start;
  468 + filesvideo[j].end_time -= audio_start;
  469 + }
  470 +}
445 471
446 - audio_start = 0;//for a new processing audio,the start is 0  
447 472
448 - for (; nv < filesvideo.size(); nv++) {  
449 - fileinfo video = filesvideo[nv]; 473 +void MergeProcess::merge_left_audio()
  474 +{
  475 + if (audio_start < audio.end_time){
  476 + sprintf(destfile, "%d_%s", nf, audio.name.c_str());
  477 + split_audio(audio.name.c_str(), audio_start, audio.end_time - audio_start, destfile);
  478 + tmp_files.push_back(destfile);
  479 +
  480 + sprintf(destfile, "%d.ts", nf);
  481 + if (nv < filesvideo.size()) {
  482 + fileinfo video = filesvideo[nv];
  483 + sprintf(pic_file, "%s.jpg", video.name.c_str());
  484 + get_video_first_frame_jpeg(video, pic_file);
  485 + tmp_files.push_back(pic_file);
  486 + }
  487 + else {
  488 + strcpy(pic_file, blank_pic_file);
  489 + }
  490 + merge_audio_pic(audio, nf, pic_file, destfile);
  491 + merged_files.push_back(destfile);
  492 + nf++;
  493 + }
  494 +}
450 495
451 - if (video.start_time < audio_start - 0.1)  
452 - {//video is much more ahead of audio,try padding silence first  
453 - if (video.end_time < audio_start + 0.1) {  
454 - sprintf(destfile, "%d_silence.aac", nf);  
455 - split_audio(silence_aac_file, 0, video.end_time - video.start_time, destfile);  
456 - tmp_files.push_back(destfile);  
457 - sprintf(destfile, "%d.ts", nf);  
458 - merge_audio_video(destfile, video.name.c_str(), destfile);  
459 - merged_files.push_back(destfile);  
460 - nf++;  
461 - continue; //for next video  
462 - }  
463 - else {  
464 - // combine a audio file for the video  
465 - double silence_audio_start = audio.end_time;//maybe need append silence  
466 - double silence_audio_end = video.end_time;  
467 -  
468 - bool need_append_silence = true;  
469 - bool to_next_video = false;  
470 -  
471 - vector<std::string > merge_audio_files;  
472 - sprintf(destfile, "%d_0_silence.aac", nf);//a duration of silence  
473 - split_audio(silence_aac_file, 0, audio_start - video.start_time, destfile);  
474 - merge_audio_files.push_back(destfile);  
475 - tmp_files.push_back(destfile);  
476 -  
477 - if (audio.end_time < video.end_time + 0.1 && audio.end_time > video.end_time - 0.1) {  
478 - merge_audio_files.push_back(audio.name); //whole audio file,just fit  
479 - audio_start = audio.end_time + 0.1;  
480 - need_append_silence = false;  
481 - }  
482 - else if (audio.end_time > video.end_time){ //split part of audio file  
483 - sprintf(destfile, "%d_%s", nf, audio.name.c_str());  
484 - merge_audio_files.push_back(destfile);  
485 - tmp_files.push_back(destfile);  
486 - split_audio(audio.name.c_str(), audio_start, video.end_time - audio_start, destfile);  
487 - audio_start = video.end_time;  
488 - need_append_silence = false;  
489 - }  
490 - else {  
491 - merge_audio_files.push_back(audio.name);  
492 -  
493 - for (; i + 1 < filesaudio.size(); i++){//since video is not finished,try find next audio  
494 - audio = filesaudio[i + 1];  
495 - if (audio.start_time < video.end_time) {//next audio should split to fit the video  
496 - silence_audio_end = audio.start_time;  
497 -  
498 - sprintf(destfile, "%d_%d_silence.aac", nf, i);  
499 - split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);  
500 - merge_audio_files.push_back(destfile);  
501 - tmp_files.push_back(destfile);  
502 -  
503 - if (audio.end_time > video.end_time - 0.1 && audio.end_time < video.end_time + 0.1) {//just match  
504 - merge_audio_files.push_back(audio.name);  
505 - need_append_silence = false;  
506 - audio_start = audio.end_time + 0.1;  
507 - i++;//this audio is used  
508 - break;  
509 - }  
510 - if (audio.end_time > video.end_time){  
511 - sprintf(destfile, "%d_%s", nf, audio.name.c_str());  
512 - merge_audio_files.push_back(destfile);  
513 - tmp_files.push_back(destfile);  
514 - split_audio(audio.name.c_str(), 0, video.end_time - audio.start_time, destfile);  
515 - need_append_silence = false;  
516 - //adjust timecode for the audio is part left  
517 - float cur_audio_start = video.end_time - audio.start_time;  
518 -  
519 - audio_start = audio.start_time;  
520 -  
521 - for (int j = i + 1; j < filesaudio.size(); j++){  
522 - filesaudio[j].start_time -= audio_start;  
523 - filesaudio[j].end_time -= audio_start;  
524 - }  
525 -  
526 - for (int j = nv; j < filesvideo.size(); j++) {  
527 - filesvideo[j].start_time -= audio_start;  
528 - filesvideo[j].end_time -= audio_start;  
529 - }  
530 - i++;  
531 - audio = filesaudio[i];  
532 - audio_start = cur_audio_start;  
533 -  
534 - to_next_video = true;  
535 -  
536 - break;  
537 - }  
538 - merge_audio_files.push_back(audio.name);//whole audio should be appended  
539 - silence_audio_start = audio.end_time; //adjust the silence start  
540 - }  
541 - else {  
542 - break;//no need for next audio  
543 - }  
544 - }//end audio find for the video  
545 - }//end else  
546 -  
547 - if (need_append_silence) {  
548 - sprintf(destfile, "%d_silence.aac", nf);  
549 - split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);  
550 - merge_audio_files.push_back(destfile);  
551 - tmp_files.push_back(destfile);  
552 - }  
553 -  
554 - sprintf(audio_file, "%d_merged.aac", nf);  
555 - merge_audio_file(merge_audio_files, audio_file);  
556 -  
557 -  
558 - sprintf(destfile, "%d.ts", nf);  
559 - merge_audio_video(audio_file, video.name.c_str(), destfile);  
560 - merged_files.push_back(destfile);  
561 - nf++;  
562 -  
563 - if (!to_next_video){  
564 - nv++;//this video is processed  
565 - break;  
566 - }  
567 - }//end need combine  
568 - }//end video is ahead of audio  
569 - //-----VS-----  
570 - //AS----------  
571 - if (video.start_time - audio_start > 0.1) {//video is behind audio too much  
572 - sprintf(audio_file, "%d_%s", nf, audio.name.c_str());  
573 - if (video.start_time < audio.end_time - 0.1){  
574 - split_audio(audio.name.c_str(), audio_start, video.start_time - audio_start, audio_file);  
575 - audio_start = video.start_time;  
576 - }  
577 - else {  
578 - split_audio(audio.name.c_str(), audio_start, audio.end_time - audio_start, audio_file);  
579 - }  
580 - tmp_files.push_back(audio_file); 496 +int MergeProcess::process_video_ahead_of_audio()
  497 +{
  498 + if (video.end_time < audio_start + 0.1) {
  499 + sprintf(destfile, "%d_silence.aac", nf);
  500 + split_audio(silence_aac_file, 0, video.end_time - video.start_time, destfile);
  501 + tmp_files.push_back(destfile);
  502 + sprintf(destfile, "%d.ts", nf);
  503 + merge_audio_video(destfile, video.name.c_str(), destfile);
  504 + merged_files.push_back(destfile);
  505 + nf++;
  506 + return 0; //for next video
  507 + }
  508 + else {
  509 + // combine a audio file for the video
  510 + double silence_audio_start = audio.end_time;//maybe need append silence
  511 + double silence_audio_end = video.end_time;
  512 +
  513 + bool need_append_silence = true;
  514 + bool to_next_video = false;
  515 +
  516 + vector<std::string > merge_audio_files;
  517 + sprintf(destfile, "%d_0_silence.aac", nf);//a duration of silence
  518 + split_audio(silence_aac_file, 0, audio_start - video.start_time, destfile);
  519 + merge_audio_files.push_back(destfile);
  520 + tmp_files.push_back(destfile);
  521 +
  522 + if (audio.end_time < video.end_time + 0.1 && audio.end_time > video.end_time - 0.1) {
  523 + merge_audio_files.push_back(audio.name); //whole audio file,just fit
  524 + audio_start = audio.end_time + 0.1;
  525 + need_append_silence = false;
  526 + }
  527 + else if (audio.end_time > video.end_time){ //split part of audio file
  528 + sprintf(destfile, "%d_%s", nf, audio.name.c_str());
  529 + merge_audio_files.push_back(destfile);
  530 + tmp_files.push_back(destfile);
  531 + split_audio(audio.name.c_str(), audio_start, video.end_time - audio_start, destfile);
  532 + audio_start = video.end_time;
  533 + need_append_silence = false;
  534 + }
  535 + else {
  536 + merge_audio_files.push_back(audio.name);
581 537
582 - sprintf(pic_file, "%s.jpg", video.name.c_str());  
583 - get_video_first_frame_jpeg(video, pic_file);  
584 - tmp_files.push_back(pic_file); 538 + for (; audio_index + 1 < filesaudio.size(); audio_index++){//since video is not finished,try find next audio
  539 + audio = filesaudio[audio_index + 1];
  540 + if (audio.start_time < video.end_time) {//next audio should split to fit the video
  541 + silence_audio_end = audio.start_time;
585 542
586 - sprintf(destfile, "%d.ts", nf);  
587 - merge_audio_pic(audio_file, pic_file, destfile);  
588 - merged_files.push_back(destfile);  
589 - nf++; 543 + sprintf(destfile, "%d_%d_silence.aac", nf, audio_index);
  544 + split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);
  545 + merge_audio_files.push_back(destfile);
  546 + tmp_files.push_back(destfile);
590 547
591 - if (video.start_time >= audio.end_time - 0.1){//all audio file no video, to next audio  
592 - audio_start = audio.end_time + 0.1;//no audio left  
593 - break;  
594 - } 548 + if (audio.end_time > video.end_time - 0.1 && audio.end_time < video.end_time + 0.1) {//just match
  549 + merge_audio_files.push_back(audio.name);
  550 + need_append_silence = false;
  551 + audio_start = audio.end_time + 0.1;
  552 + audio_index++;//this audio is used
  553 + break;
595 } 554 }
596 - //----AS--------  
597 - //----VS--------  
598 - else if (audio_start - video.start_time < 0.1){  
599 -  
600 - if (audio.end_time > video.end_time){ //this video finish, to next video  
601 - sprintf(destfile, "%d_%s", nf, audio.name.c_str());  
602 - split_audio(audio.name.c_str(), video.start_time, video.end_time - video.start_time, destfile);  
603 - tmp_files.push_back(destfile);  
604 -  
605 - audio_start = video.end_time;  
606 - sprintf(destfile, "%d.ts", nf);  
607 - merge_audio_video(audio, nf, video, destfile);  
608 - merged_files.push_back(destfile);  
609 - nf++;  
610 - }  
611 - else if (video.end_time - audio.end_time < 0.1){//just fine, this audio file finish  
612 - sprintf(destfile, "%d_%s", nf, audio.name.c_str());  
613 - split_audio(audio.name.c_str(), video.start_time, audio.end_time - video.start_time, destfile);  
614 - tmp_files.push_back(destfile);  
615 -  
616 - sprintf(destfile, "%d.ts", nf);  
617 - merge_audio_video(audio, nf, video, destfile);  
618 - merged_files.push_back(destfile);  
619 - audio_start = audio.end_time + 0.1;//no audio left  
620 - nf++;  
621 - nv++;//this video is used  
622 - break; 555 + if (audio.end_time > video.end_time){
  556 + sprintf(destfile, "%d_%s", nf, audio.name.c_str());
  557 + merge_audio_files.push_back(destfile);
  558 + tmp_files.push_back(destfile);
  559 + split_audio(audio.name.c_str(), 0, video.end_time - audio.start_time, destfile);
  560 + need_append_silence = false;
  561 + //adjust timecode for the audio is part left
  562 + float cur_audio_start = video.end_time - audio.start_time;
  563 +
  564 + audio_start = audio.start_time;
  565 +
  566 + for (int j = audio_index + 1; j < filesaudio.size(); j++){
  567 + filesaudio[j].start_time -= audio_start;
  568 + filesaudio[j].end_time -= audio_start;
623 } 569 }
624 - else { // this audio finish,add silence and/or next audio  
625 - sprintf(destfile, "%d_%s", nf, audio.name.c_str());  
626 - split_audio(audio.name.c_str(), video.start_time, audio.end_time - video.start_time, destfile);  
627 - vector<std::string > merge_audio_files;  
628 - merge_audio_files.push_back(destfile);  
629 - tmp_files.push_back(destfile);  
630 -  
631 - audio_start = audio.end_time + 0.1;  
632 -  
633 - double silence_audio_start = audio.end_time;  
634 - double silence_audio_end = video.end_time;  
635 -  
636 - bool need_silence = true;  
637 - bool to_next_video = false;  
638 - for (; i + 1 < filesaudio.size(); i++){//since video is not finished,try find next audio  
639 - audio = filesaudio[i + 1];  
640 - if (audio.start_time < video.end_time) {//next audio should split to fit the video  
641 - silence_audio_end = audio.start_time;  
642 -  
643 - sprintf(destfile, "%d_%d_silence.aac", nf, i);  
644 - split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);  
645 - merge_audio_files.push_back(destfile);  
646 - tmp_files.push_back(destfile);  
647 -  
648 - if (audio.end_time > video.end_time - 0.1 && audio.end_time < video.end_time + 0.1) {//just match  
649 - merge_audio_files.push_back(audio.name);  
650 - need_silence = false;  
651 - audio_start = audio.end_time + 0.1;  
652 - i++;  
653 - break;  
654 - }  
655 - if (audio.end_time > video.end_time){  
656 - sprintf(destfile, "%d_%s", nf, audio.name.c_str());  
657 - merge_audio_files.push_back(destfile);  
658 - tmp_files.push_back(destfile);  
659 - split_audio(audio.name.c_str(), 0, video.end_time - audio.start_time, destfile);  
660 - need_silence = false;  
661 - //adjust timecode for the audio is part left  
662 - float cur_audio_start = video.end_time - audio.start_time;  
663 -  
664 - audio_start = audio.start_time;  
665 -  
666 - for (int j = i + 1; j < filesaudio.size(); j++){  
667 - filesaudio[j].start_time -= audio_start;  
668 - filesaudio[j].end_time -= audio_start;  
669 - }  
670 -  
671 - for (int j = nv; j < filesvideo.size(); j++) {  
672 - filesvideo[j].start_time -= audio_start;  
673 - filesvideo[j].end_time -= audio_start;  
674 - }  
675 - i++;  
676 - audio = filesaudio[i];  
677 - audio_start = cur_audio_start;  
678 -  
679 - to_next_video = true;  
680 -  
681 - break;  
682 - }  
683 - merge_audio_files.push_back(audio.name);//whole audio should be appended  
684 - silence_audio_start = audio.end_time; //adjust the silence start  
685 - audio_start = audio.end_time + 0.1;  
686 - }  
687 - else {  
688 - break;//no need for next audio  
689 - }  
690 - }  
691 -  
692 - if (need_silence) {  
693 - sprintf(destfile, "%d_silence.aac", nf);  
694 - split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);  
695 - merge_audio_files.push_back(destfile);  
696 - tmp_files.push_back(destfile);  
697 - }  
698 -  
699 - sprintf(audio_file, "%d_merged.aac", nf);  
700 - merge_audio_file(merge_audio_files, audio_file);  
701 -  
702 -  
703 - sprintf(destfile, "%d.ts", nf);  
704 - merge_audio_video(audio_file, video.name.c_str(), destfile);  
705 - merged_files.push_back(destfile);  
706 - nf++;  
707 -  
708 - if (!to_next_video){  
709 - nv++;  
710 - break;  
711 - } 570 +
  571 + for (int j = nv; j < filesvideo.size(); j++) {
  572 + filesvideo[j].start_time -= audio_start;
  573 + filesvideo[j].end_time -= audio_start;
712 } 574 }
  575 + audio_index++;
  576 + audio = filesaudio[audio_index];
  577 + audio_start = cur_audio_start;
  578 +
  579 + to_next_video = true;
  580 +
  581 + break;
713 } 582 }
  583 + merge_audio_files.push_back(audio.name);//whole audio should be appended
  584 + silence_audio_start = audio.end_time; //adjust the silence start
  585 + }
  586 + else {
  587 + break;//no need for next audio
714 } 588 }
  589 + }//end audio find for the video
  590 + }//end else
  591 +
  592 + if (need_append_silence) {
  593 + sprintf(destfile, "%d_silence.aac", nf);
  594 + split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);
  595 + merge_audio_files.push_back(destfile);
  596 + tmp_files.push_back(destfile);
  597 + }
  598 +
  599 + sprintf(audio_file, "%d_merged.aac", nf);
  600 + merge_audio_file(merge_audio_files, audio_file);
  601 +
  602 +
  603 + sprintf(destfile, "%d.ts", nf);
  604 + merge_audio_video(audio_file, video.name.c_str(), destfile);
  605 + merged_files.push_back(destfile);
  606 + nf++;
  607 +
  608 + if (!to_next_video){
  609 + nv++;//this video is processed
  610 + return 1;
  611 + }
  612 + }//end need combine
  613 + return 0;
  614 +}
  615 +
  616 +int MergeProcess::process_video_behind_audio()
  617 +{
  618 + sprintf(audio_file, "%d_%s", nf, audio.name.c_str());
  619 + if (video.start_time < audio.end_time - 0.1){
  620 + split_audio(audio.name.c_str(), audio_start, video.start_time - audio_start, audio_file);
  621 + audio_start = video.start_time;
  622 + }
  623 + else {
  624 + split_audio(audio.name.c_str(), audio_start, audio.end_time - audio_start, audio_file);
  625 + }
  626 + tmp_files.push_back(audio_file);
  627 +
  628 + sprintf(pic_file, "%s.jpg", video.name.c_str());
  629 + get_video_first_frame_jpeg(video, pic_file);
  630 + tmp_files.push_back(pic_file);
715 631
716 - if (audio_start < audio.end_time){ 632 + sprintf(destfile, "%d.ts", nf);
  633 + merge_audio_pic(audio_file, pic_file, destfile);
  634 + merged_files.push_back(destfile);
  635 + nf++;
  636 +
  637 + if (video.start_time >= audio.end_time - 0.1){//all audio file no video, to next audio
  638 + audio_start = audio.end_time + 0.1;//no audio left
  639 + return 1;
  640 + }
  641 + return 0;
  642 +}
  643 +
  644 +int MergeProcess::process_video_align_audio()
  645 +{
  646 + if (audio.end_time > video.end_time){ //this video finish, to next video
  647 + sprintf(destfile, "%d_%s", nf, audio.name.c_str());
  648 + split_audio(audio.name.c_str(), video.start_time, video.end_time - video.start_time, destfile);
  649 + tmp_files.push_back(destfile);
  650 +
  651 + audio_start = video.end_time;
  652 + sprintf(destfile, "%d.ts", nf);
  653 + merge_audio_video(audio, nf, video, destfile);
  654 + merged_files.push_back(destfile);
  655 + nf++;
  656 + }
  657 + else if (video.end_time - audio.end_time < 0.1){//just fine, this audio file finish
  658 + sprintf(destfile, "%d_%s", nf, audio.name.c_str());
  659 + split_audio(audio.name.c_str(), video.start_time, audio.end_time - video.start_time, destfile);
  660 + tmp_files.push_back(destfile);
  661 +
  662 + sprintf(destfile, "%d.ts", nf);
  663 + merge_audio_video(audio, nf, video, destfile);
  664 + merged_files.push_back(destfile);
  665 + audio_start = audio.end_time + 0.1;//no audio left
  666 + nf++;
  667 + nv++;//this video is used
  668 + return 1;
  669 + }
  670 + else { // this audio finish,add silence and/or next audio
  671 + sprintf(destfile, "%d_%s", nf, audio.name.c_str());
  672 + split_audio(audio.name.c_str(), video.start_time, audio.end_time - video.start_time, destfile);
  673 + vector<std::string > merge_audio_files;
  674 + merge_audio_files.push_back(destfile);
  675 + tmp_files.push_back(destfile);
  676 +
  677 + audio_start = audio.end_time + 0.1;
  678 +
  679 + double silence_audio_start = audio.end_time;
  680 + double silence_audio_end = video.end_time;
  681 +
  682 + bool need_silence = true;
  683 + bool to_next_video = false;
  684 + for (; audio_index + 1 < filesaudio.size(); audio_index++){//since video is not finished,try find next audio
  685 + audio = filesaudio[audio_index + 1];
  686 + if (audio.start_time < video.end_time) {//next audio should split to fit the video
  687 + silence_audio_end = audio.start_time;
  688 +
  689 + sprintf(destfile, "%d_%d_silence.aac", nf, audio_index);
  690 + split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);
  691 + merge_audio_files.push_back(destfile);
  692 + tmp_files.push_back(destfile);
  693 +
  694 + if (audio.end_time > video.end_time - 0.1 && audio.end_time < video.end_time + 0.1) {//just match
  695 + merge_audio_files.push_back(audio.name);
  696 + need_silence = false;
  697 + audio_start = audio.end_time + 0.1;
  698 + audio_index++;
  699 + break;
  700 + }
  701 + if (audio.end_time > video.end_time){
717 sprintf(destfile, "%d_%s", nf, audio.name.c_str()); 702 sprintf(destfile, "%d_%s", nf, audio.name.c_str());
718 - split_audio(audio.name.c_str(), audio_start, audio.end_time - audio_start, destfile); 703 + merge_audio_files.push_back(destfile);
719 tmp_files.push_back(destfile); 704 tmp_files.push_back(destfile);
  705 + split_audio(audio.name.c_str(), 0, video.end_time - audio.start_time, destfile);
  706 + need_silence = false;
  707 + //adjust timecode for the audio is part left
  708 + float cur_audio_start = video.end_time - audio.start_time;
720 709
721 - sprintf(destfile, "%d.ts", nf);  
722 - if (nv < filesvideo.size()) {  
723 - fileinfo video = filesvideo[nv];  
724 - sprintf(pic_file, "%s.jpg", video.name.c_str());  
725 - get_video_first_frame_jpeg(video, pic_file);  
726 - tmp_files.push_back(pic_file); 710 + audio_start = audio.start_time;
  711 +
  712 + for (int j = audio_index + 1; j < filesaudio.size(); j++){
  713 + filesaudio[j].start_time -= audio_start;
  714 + filesaudio[j].end_time -= audio_start;
727 } 715 }
728 - else {  
729 - strcpy(pic_file, blank_pic_file); 716 +
  717 + for (int j = nv; j < filesvideo.size(); j++) {
  718 + filesvideo[j].start_time -= audio_start;
  719 + filesvideo[j].end_time -= audio_start;
730 } 720 }
731 - merge_audio_pic(audio, nf, pic_file, destfile);  
732 - merged_files.push_back(destfile);  
733 - nf++; 721 + audio_index++;
  722 + audio = filesaudio[audio_index];
  723 + audio_start = cur_audio_start;
  724 +
  725 + to_next_video = true;
  726 +
  727 + break;
734 } 728 }
  729 + merge_audio_files.push_back(audio.name);//whole audio should be appended
  730 + silence_audio_start = audio.end_time; //adjust the silence start
  731 + audio_start = audio.end_time + 0.1;
  732 + }
  733 + else {
  734 + break;//no need for next audio
735 } 735 }
736 } 736 }
  737 +
  738 + if (need_silence) {
  739 + sprintf(destfile, "%d_silence.aac", nf);
  740 + split_audio(silence_aac_file, 0, silence_audio_end - silence_audio_start, destfile);
  741 + merge_audio_files.push_back(destfile);
  742 + tmp_files.push_back(destfile);
  743 + }
  744 +
  745 + sprintf(audio_file, "%d_merged.aac", nf);
  746 + merge_audio_file(merge_audio_files, audio_file);
  747 +
  748 +
  749 + sprintf(destfile, "%d.ts", nf);
  750 + merge_audio_video(audio_file, video.name.c_str(), destfile);
  751 + merged_files.push_back(destfile);
  752 + nf++;
  753 +
  754 + if (!to_next_video){
  755 + nv++;
  756 + return 1;
  757 + }
  758 + }
  759 + return 0;
  760 +}
  761 +
  762 +int MergeProcess::process_video_loop()
  763 +{
  764 + for (; nv < filesvideo.size(); nv++) {
  765 + video = filesvideo[nv];
  766 +
  767 + if (video.start_time < audio_start - 0.1)
  768 + {//video is much more ahead of audio,try padding silence first
  769 + if (process_video_ahead_of_audio())
  770 + break;
  771 + }//end video is ahead of audio
  772 + if (video.start_time - audio_start > 0.1) {//video is behind audio too much
  773 + if (process_video_behind_audio())
  774 + break;
  775 + }
  776 + else if (audio_start - video.start_time < 0.1){
  777 + if (process_video_align_audio())
  778 + break;
  779 + }
  780 + }
  781 + return 0;
  782 +}
  783 +
  784 +int MergeProcess::process_va()
  785 +{
  786 + for (audio_index = 0; audio_index < filesaudio.size(); audio_index++){ //
  787 +
  788 + adjust_va_timestamp();
  789 +
  790 + audio = filesaudio[audio_index];
  791 + audio_start = 0;//for a new processing audio,the start is 0
  792 +
  793 + process_video_loop();
  794 +
  795 + merge_left_audio();
  796 + }
  797 + return 0;
  798 +}
  799 +
  800 +
  801 +int MergeProcess::process_files(const char * output_dest_file)
  802 +{
  803 + //don't split video, for a video, using merged audios to mix with it
  804 + //for audio, mix with video or jpg
  805 +
  806 + if (filesvideo.size()) {//has video files
  807 + if (filesaudio.size()){
  808 + process_va(); //process the case both audio and video files exist
  809 + }
737 else{//only video 810 else{//only video
738 if (filesvideo.size() == 1){ 811 if (filesvideo.size() == 1){
739 fileinfo video = filesvideo[0]; 812 fileinfo video = filesvideo[0];
@@ -772,7 +845,6 @@ int process_files(const char * output_dest_file) @@ -772,7 +845,6 @@ int process_files(const char * output_dest_file)
772 845
773 adjust_dest_timecode("m.ts", output_dest_file); 846 adjust_dest_timecode("m.ts", output_dest_file);
774 847
775 -  
776 if (!keep_tmp_files) { 848 if (!keep_tmp_files) {
777 removefiles(tmp_files); 849 removefiles(tmp_files);
778 removefiles(merged_files); 850 removefiles(merged_files);
@@ -866,7 +938,8 @@ int main(int argc, char * argv[]) @@ -866,7 +938,8 @@ int main(int argc, char * argv[])
866 938
867 load_codec_param(); 939 load_codec_param();
868 940
869 - process_files("dest.ts"); 941 + MergeProcess mp(media_files[type_audio], media_files[type_video]);
  942 + mp.process_files("dest.ts");
870 943
871 return 0; 944 return 0;
872 } 945 }