winlin

decoded audio and video. add avc file format doc

@@ -23,8 +23,11 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -23,8 +23,11 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 23
24 #include <srs_core_codec.hpp> 24 #include <srs_core_codec.hpp>
25 25
  26 +#include <string.h>
  27 +
26 #include <srs_core_error.hpp> 28 #include <srs_core_error.hpp>
27 #include <srs_core_stream.hpp> 29 #include <srs_core_stream.hpp>
  30 +#include <srs_core_log.hpp>
28 31
29 SrsCodec::SrsCodec() 32 SrsCodec::SrsCodec()
30 { 33 {
@@ -36,10 +39,9 @@ SrsCodec::SrsCodec() @@ -36,10 +39,9 @@ SrsCodec::SrsCodec()
36 video_codec_id = 0; 39 video_codec_id = 0;
37 audio_data_rate = 0; 40 audio_data_rate = 0;
38 audio_codec_id = 0; 41 audio_codec_id = 0;
39 - aac_sample_rate = 0;  
40 - sample_rate = 0;  
41 - sample_size = 0;  
42 - audio_channels = 0; 42 + sound_rate = 0;
  43 + sound_size = 0;
  44 + sound_type = 0;
43 profile = 0; 45 profile = 0;
44 level = 0; 46 level = 0;
45 avc_extra_size = 0; 47 avc_extra_size = 0;
@@ -58,44 +60,133 @@ SrsCodec::~SrsCodec() @@ -58,44 +60,133 @@ SrsCodec::~SrsCodec()
58 srs_freep(stream); 60 srs_freep(stream);
59 } 61 }
60 62
61 -int SrsCodec::parse_av_codec(bool is_video, int8_t* data, int size) 63 +int SrsCodec::parse_audio_codec(int8_t* data, int size)
62 { 64 {
63 int ret = ERROR_SUCCESS; 65 int ret = ERROR_SUCCESS;
64 66
65 if (!data || size <= 0) { 67 if (!data || size <= 0) {
  68 + srs_trace("no audio present, hls ignore it.");
66 return ret; 69 return ret;
67 } 70 }
68 71
69 if ((ret = stream->initialize((char*)data, size)) != ERROR_SUCCESS) { 72 if ((ret = stream->initialize((char*)data, size)) != ERROR_SUCCESS) {
70 return ret; 73 return ret;
71 } 74 }
  75 +
  76 + // audio decode
  77 + if (!stream->require(1)) {
  78 + ret = ERROR_HLS_DECODE_ERROR;
  79 + srs_error("hls decode audio sound_format failed. ret=%d", ret);
  80 + return ret;
  81 + }
72 82
73 - if (is_video) {  
74 - if (!stream->require(1)) {  
75 - return ret;  
76 - }  
77 -  
78 - int8_t frame_type = stream->read_1bytes();  
79 - int8_t codec_id = frame_type & 0x0f;  
80 - frame_type = (frame_type >> 4) & 0x0f;  
81 -  
82 - video_codec_id = codec_id;  
83 - if (codec_id != SrsCodecVideoAVC) {  
84 - return ret;  
85 - }  
86 -  
87 - if (!stream->require(4)) {  
88 - return ret; 83 + int8_t sound_format = stream->read_1bytes();
  84 +
  85 + sound_type = sound_format & 0x01;
  86 + sound_size = (sound_format >> 1) & 0x01;
  87 + sound_rate = (sound_format >> 2) & 0x01;
  88 + sound_format = (sound_format >> 4) & 0x0f;
  89 +
  90 + audio_codec_id = sound_format;
  91 +
  92 + // only support aac
  93 + if (audio_codec_id != SrsCodecAudioAAC) {
  94 + ret = ERROR_HLS_DECODE_ERROR;
  95 + srs_error("hls only support audio aac codec. ret=%d", ret);
  96 + return ret;
  97 + }
  98 +
  99 + if (!stream->require(1)) {
  100 + ret = ERROR_HLS_DECODE_ERROR;
  101 + srs_error("hls decode audio aac_packet_type failed. ret=%d", ret);
  102 + return ret;
  103 + }
  104 +
  105 + int8_t aac_packet_type = stream->read_1bytes();
  106 +
  107 + if (aac_packet_type == SrsCodecAudioTypeSequenceHeader) {
  108 + // AudioSpecificConfig
  109 + // 1.6.2.1 AudioSpecificConfig, in aac-mp4a-format-ISO_IEC_14496-3+2001.pdf, page 33.
  110 + aac_extra_size = size - stream->pos();
  111 + if (aac_extra_size > 0) {
  112 + srs_freepa(aac_extra_data);
  113 + aac_extra_data = new char[aac_extra_size];
  114 + memcpy(aac_extra_data, data + stream->pos(), aac_extra_size);
89 } 115 }
90 - int8_t avc_packet_type = stream->read_1bytes();  
91 - int32_t composition_time = stream->read_3bytes();  
92 -  
93 - // 5.2.4.1.1 Syntax  
94 - if (avc_packet_type == SrsCodecVideoAVCTypeSequenceHeader) { 116 + } else if (aac_packet_type == SrsCodecAudioTypeRawData) {
  117 + // Raw AAC frame data in UI8 []
  118 + } else {
  119 + // ignored.
  120 + }
  121 +
  122 + srs_info("audio decoded, type=%d, codec=%d, asize=%d, rate=%d, format=%d, size=%d",
  123 + sound_type, audio_codec_id, sound_size, sound_rate, sound_format, size);
  124 +
  125 + return ret;
  126 +}
  127 +
  128 +int SrsCodec::parse_video_codec(int8_t* data, int size)
  129 +{
  130 + int ret = ERROR_SUCCESS;
  131 +
  132 + if (!data || size <= 0) {
  133 + srs_trace("no video present, hls ignore it.");
  134 + return ret;
  135 + }
  136 +
  137 + if ((ret = stream->initialize((char*)data, size)) != ERROR_SUCCESS) {
  138 + return ret;
  139 + }
  140 +
  141 + // video decode
  142 + if (!stream->require(1)) {
  143 + ret = ERROR_HLS_DECODE_ERROR;
  144 + srs_error("hls decode video frame_type failed. ret=%d", ret);
  145 + return ret;
  146 + }
  147 +
  148 + int8_t frame_type = stream->read_1bytes();
  149 + int8_t codec_id = frame_type & 0x0f;
  150 + frame_type = (frame_type >> 4) & 0x0f;
  151 +
  152 + video_codec_id = codec_id;
  153 + // only support h.264/avc
  154 + if (codec_id != SrsCodecVideoAVC) {
  155 + ret = ERROR_HLS_DECODE_ERROR;
  156 + srs_error("hls only support video h.264/avc codec. ret=%d", ret);
  157 + return ret;
  158 + }
  159 +
  160 + if (!stream->require(4)) {
  161 + ret = ERROR_HLS_DECODE_ERROR;
  162 + srs_error("hls decode video avc_packet_type failed. ret=%d", ret);
  163 + return ret;
  164 + }
  165 + int8_t avc_packet_type = stream->read_1bytes();
  166 + int32_t composition_time = stream->read_3bytes();
  167 +
  168 + // avoid warning, used it future.
  169 + (void)composition_time;
  170 +
  171 + if (avc_packet_type == SrsCodecVideoAVCTypeSequenceHeader) {
  172 + // AVCDecoderConfigurationRecord
  173 + // 5.2.4.1.1 Syntax, H.264-AVC-ISO_IEC_14496-15.pdf, page 16
  174 + avc_extra_size = size - stream->pos();
  175 + if (avc_extra_size > 0) {
  176 + srs_freepa(avc_extra_data);
  177 + avc_extra_data = new char[avc_extra_size];
  178 + memcpy(avc_extra_data, data + stream->pos(), avc_extra_size);
95 } 179 }
  180 + } else if (avc_packet_type == SrsCodecVideoAVCTypeNALU){
  181 + // One or more NALUs (Full frames are required)
  182 + // 5.3.4.2.1 Syntax, H.264-AVC-ISO_IEC_14496-15.pdf, page 20
96 } else { 183 } else {
  184 + // ignored.
97 } 185 }
98 186
  187 + srs_info("video decoded, type=%d, codec=%d, avc=%d, time=%d, size=%d",
  188 + frame_type, video_codec_id, avc_packet_type, composition_time, size);
  189 +
99 return ret; 190 return ret;
100 } 191 }
101 192
@@ -128,6 +128,39 @@ enum SrsCodecAudioType @@ -128,6 +128,39 @@ enum SrsCodecAudioType
128 SrsCodecAudioTypeRawData = 1, 128 SrsCodecAudioTypeRawData = 1,
129 }; 129 };
130 130
  131 +// Sampling rate. The following values are defined:
  132 +// 0 = 5.5 kHz = 5512 Hz
  133 +// 1 = 11 kHz = 11025 Hz
  134 +// 2 = 22 kHz = 22050 Hz
  135 +// 3 = 44 kHz = 44100 Hz
  136 +enum SrsCodecAudioSampleRate
  137 +{
  138 + SrsCodecAudioSampleRate5512 = 0,
  139 + SrsCodecAudioSampleRate11025 = 1,
  140 + SrsCodecAudioSampleRate22050 = 2,
  141 + SrsCodecAudioSampleRate44100 = 3,
  142 +};
  143 +
  144 +// Size of each audio sample. This parameter only pertains to
  145 +// uncompressed formats. Compressed formats always decode
  146 +// to 16 bits internally.
  147 +// 0 = 8-bit samples
  148 +// 1 = 16-bit samples
  149 +enum SrsCodecAudioSampleSize
  150 +{
  151 + SrsCodecAudioSampleSize8bit = 0,
  152 + SrsCodecAudioSampleSize16bit = 1,
  153 +};
  154 +
  155 +// Mono or stereo sound
  156 +// 0 = Mono sound
  157 +// 1 = Stereo sound
  158 +enum SrsCodecAudioSoundType
  159 +{
  160 + SrsCodecAudioSoundTypeMono = 0,
  161 + SrsCodecAudioSoundTypeStereo = 1,
  162 +};
  163 +
131 /** 164 /**
132 * Annex E. The FLV File Format 165 * Annex E. The FLV File Format
133 */ 166 */
@@ -139,30 +172,35 @@ public: @@ -139,30 +172,35 @@ public:
139 /** 172 /**
140 * video specified 173 * video specified
141 */ 174 */
142 - int width;  
143 - int height;  
144 - int duration;  
145 - int frame_rate;  
146 // @see: SrsCodecVideo 175 // @see: SrsCodecVideo
147 int video_codec_id; 176 int video_codec_id;
148 - int video_data_rate; // in bps  
149 u_int8_t profile; // profile_idc, page 45. 177 u_int8_t profile; // profile_idc, page 45.
150 u_int8_t level; // level_idc, page 45. 178 u_int8_t level; // level_idc, page 45.
  179 + int width;
  180 + int height;
  181 + int video_data_rate; // in bps
  182 + int frame_rate;
  183 + int duration;
151 /** 184 /**
152 * audio specified 185 * audio specified
153 */ 186 */
  187 + // @see: SrsCodecAudioType
154 int audio_codec_id; 188 int audio_codec_id;
  189 + // @see: SrsCodecAudioSampleRate
  190 + int sound_rate;
  191 + // @see: SrsCodecAudioSampleSize
  192 + int sound_size;
  193 + // @see: SrsCodecAudioSoundType
  194 + int sound_type;
155 int audio_data_rate; // in bps 195 int audio_data_rate; // in bps
156 - int aac_sample_rate;  
157 - int sample_rate; /* 5512, 11025, 22050, 44100 */  
158 - int sample_size; /* 1=8bit, 2=16bit */  
159 - int audio_channels; /* 1, 2 */  
160 // the avc extra data, the AVC sequence header, 196 // the avc extra data, the AVC sequence header,
161 - // without the flv codec header 197 + // without the flv codec header,
  198 + // @see: ffmpeg, AVCodecContext::extradata
162 int avc_extra_size; 199 int avc_extra_size;
163 char* avc_extra_data; 200 char* avc_extra_data;
164 // the aac extra data, the AAC sequence header, 201 // the aac extra data, the AAC sequence header,
165 - // without the flv codec header 202 + // without the flv codec header,
  203 + // @see: ffmpeg, AVCodecContext::extradata
166 int aac_extra_size; 204 int aac_extra_size;
167 char* aac_extra_data; 205 char* aac_extra_data;
168 public: 206 public:
@@ -170,7 +208,8 @@ public: @@ -170,7 +208,8 @@ public:
170 virtual ~SrsCodec(); 208 virtual ~SrsCodec();
171 // the following function used for hls to build the codec info. 209 // the following function used for hls to build the codec info.
172 public: 210 public:
173 - virtual int parse_av_codec(bool is_video, int8_t* data, int size); 211 + virtual int parse_audio_codec(int8_t* data, int size);
  212 + virtual int parse_video_codec(int8_t* data, int size);
174 // the following function used to finger out the flv/rtmp packet detail. 213 // the following function used to finger out the flv/rtmp packet detail.
175 public: 214 public:
176 /** 215 /**
@@ -107,4 +107,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -107,4 +107,7 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
107 // when open ssl sha256 digest key invalid size. 107 // when open ssl sha256 digest key invalid size.
108 #define ERROR_OpenSslSha256DigestSize 512 108 #define ERROR_OpenSslSha256DigestSize 512
109 109
  110 +#define ERROR_HLS_METADATA 600
  111 +#define ERROR_HLS_DECODE_ERROR 601
  112 +
110 #endif 113 #endif
@@ -53,11 +53,13 @@ int SrsHLS::on_meta_data(SrsOnMetaDataPacket* metadata) @@ -53,11 +53,13 @@ int SrsHLS::on_meta_data(SrsOnMetaDataPacket* metadata)
53 int ret = ERROR_SUCCESS; 53 int ret = ERROR_SUCCESS;
54 54
55 if (!metadata || !metadata->metadata) { 55 if (!metadata || !metadata->metadata) {
  56 + srs_trace("no metadata persent, hls ignored it.");
56 return ret; 57 return ret;
57 } 58 }
58 59
59 SrsAmf0Object* obj = metadata->metadata; 60 SrsAmf0Object* obj = metadata->metadata;
60 if (obj->size() <= 0) { 61 if (obj->size() <= 0) {
  62 + srs_trace("no metadata persent, hls ignored it.");
61 return ret; 63 return ret;
62 } 64 }
63 65
@@ -90,21 +92,38 @@ int SrsHLS::on_meta_data(SrsOnMetaDataPacket* metadata) @@ -90,21 +92,38 @@ int SrsHLS::on_meta_data(SrsOnMetaDataPacket* metadata)
90 codec->audio_data_rate = (int)(1000 * srs_amf0_convert<SrsAmf0Number>(prop)->value); 92 codec->audio_data_rate = (int)(1000 * srs_amf0_convert<SrsAmf0Number>(prop)->value);
91 } 93 }
92 if ((prop = obj->get_property("audiosamplerate")) != NULL && prop->is_number()) { 94 if ((prop = obj->get_property("audiosamplerate")) != NULL && prop->is_number()) {
93 - codec->sample_rate = (int)srs_amf0_convert<SrsAmf0Number>(prop)->value; 95 + int sound_rate = (int)srs_amf0_convert<SrsAmf0Number>(prop)->value;
  96 + if (sound_rate == 5512) {
  97 + codec->sound_rate = SrsCodecAudioSampleRate5512;
  98 + } else if (sound_rate == 11025) {
  99 + codec->sound_rate = SrsCodecAudioSampleRate11025;
  100 + } else if (sound_rate == 22050) {
  101 + codec->sound_rate = SrsCodecAudioSampleRate22050;
  102 + } else if (sound_rate == 44100) {
  103 + codec->sound_rate = SrsCodecAudioSampleRate44100;
  104 + } else {
  105 + ret = ERROR_HLS_METADATA;
  106 + srs_error("invalid sound_rate of metadata: %d, ret=%d", sound_rate, ret);
  107 + return ret;
  108 + }
94 } 109 }
95 if ((prop = obj->get_property("audiosamplesize")) != NULL && prop->is_number()) { 110 if ((prop = obj->get_property("audiosamplesize")) != NULL && prop->is_number()) {
96 - codec->sample_size = (int)srs_amf0_convert<SrsAmf0Number>(prop)->value;  
97 - if (codec->sample_size == 16) {  
98 - codec->sample_size = 2; 111 + int sound_size = (int)srs_amf0_convert<SrsAmf0Number>(prop)->value;
  112 + if (sound_size == 16) {
  113 + codec->sound_size = SrsCodecAudioSampleSize16bit;
  114 + } else if (sound_size == 8) {
  115 + codec->sound_size = SrsCodecAudioSampleSize8bit;
99 } else { 116 } else {
100 - codec->sample_size = 1; 117 + ret = ERROR_HLS_METADATA;
  118 + srs_error("invalid sound_size of metadata: %d, ret=%d", sound_size, ret);
  119 + return ret;
101 } 120 }
102 } 121 }
103 if ((prop = obj->get_property("stereo")) != NULL && prop->is_number()) { 122 if ((prop = obj->get_property("stereo")) != NULL && prop->is_number()) {
104 if (srs_amf0_convert<SrsAmf0Boolean>(prop)->value) { 123 if (srs_amf0_convert<SrsAmf0Boolean>(prop)->value) {
105 - codec->audio_channels = 2; 124 + codec->sound_type = SrsCodecAudioSoundTypeStereo;
106 } else { 125 } else {
107 - codec->audio_channels = 1; 126 + codec->sound_type = SrsCodecAudioSoundTypeMono;
108 } 127 }
109 } 128 }
110 129
@@ -115,7 +134,7 @@ int SrsHLS::on_audio(SrsCommonMessage* audio) @@ -115,7 +134,7 @@ int SrsHLS::on_audio(SrsCommonMessage* audio)
115 { 134 {
116 int ret = ERROR_SUCCESS; 135 int ret = ERROR_SUCCESS;
117 136
118 - if ((ret = codec->parse_av_codec(false, audio->payload, audio->size)) != ERROR_SUCCESS) { 137 + if ((ret = codec->parse_audio_codec(audio->payload, audio->size)) != ERROR_SUCCESS) {
119 return ret; 138 return ret;
120 } 139 }
121 140
@@ -126,7 +145,7 @@ int SrsHLS::on_video(SrsCommonMessage* video) @@ -126,7 +145,7 @@ int SrsHLS::on_video(SrsCommonMessage* video)
126 { 145 {
127 int ret = ERROR_SUCCESS; 146 int ret = ERROR_SUCCESS;
128 147
129 - if ((ret = codec->parse_av_codec(true, video->payload, video->size)) != ERROR_SUCCESS) { 148 + if ((ret = codec->parse_video_codec(video->payload, video->size)) != ERROR_SUCCESS) {
130 return ret; 149 return ret;
131 } 150 }
132 151