winlin

for #738, add isom boxes.

@@ -23,6 +23,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -23,6 +23,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 23
24 #include <srs_kernel_mp4.hpp> 24 #include <srs_kernel_mp4.hpp>
25 25
  26 +#include <string.h>
  27 +
26 SrsMp4Box::SrsMp4Box() 28 SrsMp4Box::SrsMp4Box()
27 { 29 {
28 size = 0; 30 size = 0;
@@ -56,6 +58,27 @@ SrsMp4FileTypeBox::~SrsMp4FileTypeBox() @@ -56,6 +58,27 @@ SrsMp4FileTypeBox::~SrsMp4FileTypeBox()
56 srs_freepa(compatible_brands); 58 srs_freepa(compatible_brands);
57 } 59 }
58 60
  61 +SrsMp4MediaDataBox::SrsMp4MediaDataBox()
  62 +{
  63 + type = 0x6d646174; // 'mdat'
  64 + data = NULL;
  65 + nb_data = 0;
  66 +}
  67 +
  68 +SrsMp4MediaDataBox::~SrsMp4MediaDataBox()
  69 +{
  70 + srs_freepa(data);
  71 +}
  72 +
  73 +SrsMp4FreeSpaceBox::SrsMp4FreeSpaceBox()
  74 +{
  75 + type = 0x66726565; // ‘free’ or ‘skip’
  76 +}
  77 +
  78 +SrsMp4FreeSpaceBox::~SrsMp4FreeSpaceBox()
  79 +{
  80 +}
  81 +
59 SrsMp4MovieBox::SrsMp4MovieBox() 82 SrsMp4MovieBox::SrsMp4MovieBox()
60 { 83 {
61 type = 0x6d6f6f76; // 'moov' 84 type = 0x6d6f6f76; // 'moov'
@@ -68,9 +91,275 @@ SrsMp4MovieBox::~SrsMp4MovieBox() @@ -68,9 +91,275 @@ SrsMp4MovieBox::~SrsMp4MovieBox()
68 SrsMp4MovieHeaderBox::SrsMp4MovieHeaderBox() 91 SrsMp4MovieHeaderBox::SrsMp4MovieHeaderBox()
69 { 92 {
70 type = 0x6d766864; // 'mvhd' 93 type = 0x6d766864; // 'mvhd'
  94 +
  95 + rate = 0x00010000; // typically 1.0
  96 + volume = 0x0100; // typically, full volume
  97 + reserved0 = 0;
  98 + reserved1 = 0;
  99 +
  100 + int32_t v[] = {0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000};
  101 + memcpy(matrix, v, 36);
  102 +
  103 + memset(pre_defined, 0, 24);
71 } 104 }
72 105
73 SrsMp4MovieHeaderBox::~SrsMp4MovieHeaderBox() 106 SrsMp4MovieHeaderBox::~SrsMp4MovieHeaderBox()
74 { 107 {
75 } 108 }
76 109
  110 +SrsMp4TrackBox::SrsMp4TrackBox()
  111 +{
  112 + type = 0x7472616b; // 'trak'
  113 +}
  114 +
  115 +SrsMp4TrackBox::~SrsMp4TrackBox()
  116 +{
  117 +}
  118 +
  119 +SrsMp4TrackHeaderBox::SrsMp4TrackHeaderBox()
  120 +{
  121 + type = 0x746b6864; // 'tkhd'
  122 +
  123 + reserved0 = 0;
  124 + reserved1 = 0;
  125 + reserved2 = 0;
  126 + layer = alternate_group = 0;
  127 + volume = 0x0100; // if track_is_audio 0x0100 else 0
  128 +
  129 + int32_t v[] = {0x00010000, 0, 0, 0, 0x00010000, 0, 0, 0, 0x40000000};
  130 + memcpy(matrix, v, 36);
  131 +}
  132 +
  133 +SrsMp4TrackHeaderBox::~SrsMp4TrackHeaderBox()
  134 +{
  135 +}
  136 +
  137 +SrsMp4EditBox::SrsMp4EditBox()
  138 +{
  139 + type = 0x65647473; // 'edts'
  140 +}
  141 +
  142 +SrsMp4EditBox::~SrsMp4EditBox()
  143 +{
  144 +}
  145 +
  146 +SrsMp4ElstEntry::SrsMp4ElstEntry()
  147 +{
  148 + media_rate_fraction = 0;
  149 +}
  150 +
  151 +SrsMp4EditListBox::SrsMp4EditListBox()
  152 +{
  153 + type = 0x656c7374; // 'elst'
  154 +
  155 + entry_count = 0;
  156 + entries = NULL;
  157 +}
  158 +
  159 +SrsMp4EditListBox::~SrsMp4EditListBox()
  160 +{
  161 + srs_freepa(entries);
  162 +}
  163 +
  164 +SrsMp4MediaBox::SrsMp4MediaBox()
  165 +{
  166 + type = 0x6d646961; // 'mdia'
  167 +}
  168 +
  169 +SrsMp4MediaBox::~SrsMp4MediaBox()
  170 +{
  171 +}
  172 +
  173 +SrsMp4MediaHeaderBox::SrsMp4MediaHeaderBox()
  174 +{
  175 + type = 0x6d646864; // 'mdhd'
  176 +
  177 + pad = 0;
  178 + pre_defined = 0;
  179 +}
  180 +
  181 +SrsMp4MediaHeaderBox::~SrsMp4MediaHeaderBox()
  182 +{
  183 +}
  184 +
  185 +SrsMp4HandlerReferenceBox::SrsMp4HandlerReferenceBox()
  186 +{
  187 + type = 0x68646c72; // 'hdlr'
  188 +
  189 + pre_defined = 0;
  190 + memset(reserved, 0, 12);
  191 +}
  192 +
  193 +SrsMp4HandlerReferenceBox::~SrsMp4HandlerReferenceBox()
  194 +{
  195 +}
  196 +
  197 +SrsMp4MediaInformationBox::SrsMp4MediaInformationBox()
  198 +{
  199 + type = 0x6d696e66; // 'minf'
  200 +}
  201 +
  202 +SrsMp4MediaInformationBox::~SrsMp4MediaInformationBox()
  203 +{
  204 +}
  205 +
  206 +SrsMp4VideoMeidaHeaderBox::SrsMp4VideoMeidaHeaderBox()
  207 +{
  208 + type = 0x766d6864; // 'vmhd'
  209 + version = 0;
  210 + flags = 1;
  211 +
  212 + graphicsmode = 0;
  213 + memset(opcolor, 0, 6);
  214 +}
  215 +
  216 +SrsMp4VideoMeidaHeaderBox::~SrsMp4VideoMeidaHeaderBox()
  217 +{
  218 +}
  219 +
  220 +SrsMp4SoundMeidaHeaderBox::SrsMp4SoundMeidaHeaderBox()
  221 +{
  222 + type = 0x736d6864; // 'smhd'
  223 +
  224 + reserved = balance = 0;
  225 +}
  226 +
  227 +SrsMp4SoundMeidaHeaderBox::~SrsMp4SoundMeidaHeaderBox()
  228 +{
  229 +}
  230 +
  231 +SrsMp4DataInformationBox::SrsMp4DataInformationBox()
  232 +{
  233 + type = 0x64696e66; // 'dinf'
  234 +}
  235 +
  236 +SrsMp4DataInformationBox::~SrsMp4DataInformationBox()
  237 +{
  238 +}
  239 +
  240 +SrsMp4DataEntryBox::SrsMp4DataEntryBox()
  241 +{
  242 +}
  243 +
  244 +SrsMp4DataEntryUrlBox::SrsMp4DataEntryUrlBox()
  245 +{
  246 + type = 0x75726c20; // 'url '
  247 +}
  248 +
  249 +SrsMp4DataEntryUrnBox::SrsMp4DataEntryUrnBox()
  250 +{
  251 + type = 0x75726e20; // 'urn '
  252 +}
  253 +
  254 +SrsMp4DataReferenceBox::SrsMp4DataReferenceBox()
  255 +{
  256 + type = 0x64726566; // 'dref'
  257 +
  258 + entry_count = 0;
  259 + entries = NULL;
  260 +}
  261 +
  262 +SrsMp4DataReferenceBox::~SrsMp4DataReferenceBox()
  263 +{
  264 +}
  265 +
  266 +SrsMp4SampleTableBox::SrsMp4SampleTableBox()
  267 +{
  268 + type = 0x7374626c; // 'stbl'
  269 +}
  270 +
  271 +SrsMp4SampleTableBox::~SrsMp4SampleTableBox()
  272 +{
  273 +}
  274 +
  275 +SrsMp4SampleEntry::SrsMp4SampleEntry()
  276 +{
  277 + memset(reserved, 0, 6);
  278 +}
  279 +
  280 +SrsMp4SampleEntry::~SrsMp4SampleEntry()
  281 +{
  282 +}
  283 +
  284 +SrsMp4VisualSampleEntry::SrsMp4VisualSampleEntry()
  285 +{
  286 + pre_defined0 = 0;
  287 + reserved0 = 0;
  288 + reserved1 = 0;
  289 + memset(pre_defined1, 0, 12);
  290 + memset(compressorname, 0, 32);
  291 + frame_count = 1;
  292 + horizresolution = 0x00480000; // 72 dpi
  293 + vertresolution = 0x00480000; // 72 dpi
  294 + depth = 0x0018;
  295 + pre_defined2 = -1;
  296 +}
  297 +
  298 +SrsMp4VisualSampleEntry::~SrsMp4VisualSampleEntry()
  299 +{
  300 +}
  301 +
  302 +SrsMp4AudioSampleEntry::SrsMp4AudioSampleEntry()
  303 +{
  304 + memset(reserved0, 0, 8);
  305 + pre_defined0 = 0;
  306 + reserved1 = 0;
  307 + channelcount = 2;
  308 + samplesize = 16;
  309 +}
  310 +
  311 +SrsMp4AudioSampleEntry::~SrsMp4AudioSampleEntry()
  312 +{
  313 +}
  314 +
  315 +SrsMp4SampleDescriptionBox::SrsMp4SampleDescriptionBox()
  316 +{
  317 + type = 0x73747364; // 'stsd'
  318 +
  319 + entry_count = 0;
  320 + entries = NULL;
  321 +}
  322 +
  323 +SrsMp4SampleDescriptionBox::~SrsMp4SampleDescriptionBox()
  324 +{
  325 + srs_freepa(entries);
  326 +}
  327 +
  328 +SrsMp4SttsEntry::SrsMp4SttsEntry()
  329 +{
  330 + sample_count = 0;
  331 + sample_delta = 0;
  332 +}
  333 +
  334 +SrsMp4DecodingTime2SampleBox::SrsMp4DecodingTime2SampleBox()
  335 +{
  336 + type = 0x73747473; // 'stts'
  337 +
  338 + entry_count = 0;
  339 + entries = NULL;
  340 +}
  341 +
  342 +SrsMp4DecodingTime2SampleBox::~SrsMp4DecodingTime2SampleBox()
  343 +{
  344 + srs_freepa(entries);
  345 +}
  346 +
  347 +SrsMp4CttsEntry::SrsMp4CttsEntry()
  348 +{
  349 + sample_count = 0;
  350 + sample_offset = 0;
  351 +}
  352 +
  353 +SrsMp4CompositionTime2SampleBox::SrsMp4CompositionTime2SampleBox()
  354 +{
  355 + type = 0x63747473; // 'ctts'
  356 +
  357 + entry_count = 0;
  358 + entries = NULL;
  359 +}
  360 +
  361 +SrsMp4CompositionTime2SampleBox::~SrsMp4CompositionTime2SampleBox()
  362 +{
  363 + srs_freepa(entries);
  364 +}
  365 +
@@ -29,6 +29,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -29,6 +29,8 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 */ 29 */
30 #include <srs_core.hpp> 30 #include <srs_core.hpp>
31 31
  32 +#include <string>
  33 +
32 /** 34 /**
33 * 4.2 Object Structure 35 * 4.2 Object Structure
34 * ISO_IEC_14496-12-base-format-2012.pdf, page 16 36 * ISO_IEC_14496-12-base-format-2012.pdf, page 16
@@ -63,8 +65,12 @@ public: @@ -63,8 +65,12 @@ public:
63 }; 65 };
64 66
65 /** 67 /**
66 - * 4.3 File Type Box 68 + * 4.3 File Type Box (ftyp)
67 * ISO_IEC_14496-12-base-format-2012.pdf, page 17 69 * ISO_IEC_14496-12-base-format-2012.pdf, page 17
  70 + * Files written to this version of this specification must contain a file-type box. For compatibility with an earlier
  71 + * version of this specification, files may be conformant to this specification and not contain a file-type box. Files
  72 + * with no file-type box should be read as if they contained an FTYP box with Major_brand='mp41', minor_version=0, and
  73 + * the single compatible brand 'mp41'.
68 */ 74 */
69 class SrsMp4FileTypeBox : public SrsMp4Box 75 class SrsMp4FileTypeBox : public SrsMp4Box
70 { 76 {
@@ -83,8 +89,39 @@ public: @@ -83,8 +89,39 @@ public:
83 }; 89 };
84 90
85 /** 91 /**
86 - * 8.2.1 Movie Box  
87 - * ISO_IEC_14496-12-base-format-2012.pdf, page 31 92 + * 8.1.1 Media Data Box (mdat)
  93 + * ISO_IEC_14496-12-base-format-2012.pdf, page 29
  94 + * This box contains the media data. In video tracks, this box would contain video frames.
  95 + * A presentation may contain zero or more Media Data Boxes. The actual media data follows the type field;
  96 + * its structure is described by the metadata (see particularly the sample table, subclause 8.5, and the
  97 + * item location box, subclause 8.11.3).
  98 + */
  99 +class SrsMp4MediaDataBox : public SrsMp4Box
  100 +{
  101 +private:
  102 + int nb_data;
  103 + uint8_t* data;
  104 +public:
  105 + SrsMp4MediaDataBox();
  106 + virtual ~SrsMp4MediaDataBox();
  107 +};
  108 +
  109 +/**
  110 + * 8.1.2 Free Space Box (free or skip)
  111 + * ISO_IEC_14496-12-base-format-2012.pdf, page 29
  112 + */
  113 +class SrsMp4FreeSpaceBox : public SrsMp4Box
  114 +{
  115 +public:
  116 + SrsMp4FreeSpaceBox();
  117 + virtual ~SrsMp4FreeSpaceBox();
  118 +};
  119 +
  120 +/**
  121 + * 8.2.1 Movie Box (moov)
  122 + * ISO_IEC_14496-12-base-format-2012.pdf, page 30
  123 + * The metadata for a presentation is stored in the single Movie Box which occurs at the top-level of a file.
  124 + * Normally this box is close to the beginning or end of the file, though this is not required.
88 */ 125 */
89 class SrsMp4MovieBox : public SrsMp4Box 126 class SrsMp4MovieBox : public SrsMp4Box
90 { 127 {
@@ -94,15 +131,531 @@ public: @@ -94,15 +131,531 @@ public:
94 }; 131 };
95 132
96 /** 133 /**
97 - * 8.2.2 Movie Header Box 134 + * 8.2.2 Movie Header Box (mvhd)
98 * ISO_IEC_14496-12-base-format-2012.pdf, page 31 135 * ISO_IEC_14496-12-base-format-2012.pdf, page 31
99 */ 136 */
100 -class SrsMp4MovieHeaderBox : public SrsMp4Box 137 +class SrsMp4MovieHeaderBox : public SrsMp4FullBox
101 { 138 {
102 public: 139 public:
  140 + // an integer that declares the creation time of the presentation (in seconds since
  141 + // midnight, Jan. 1, 1904, in UTC time)
  142 + uint64_t creation_time;
  143 + // an integer that declares the most recent time the presentation was modified (in
  144 + // seconds since midnight, Jan. 1, 1904, in UTC time)
  145 + uint64_t modification_time;
  146 + // an integer that specifies the time-scale for the entire presentation; this is the number of
  147 + // time units that pass in one second. For example, a time coordinate system that measures time in
  148 + // sixtieths of a second has a time scale of 60.
  149 + uint32_t timescale;
  150 + // an integer that declares length of the presentation (in the indicated timescale). This property
  151 + // is derived from the presentation’s tracks: the value of this field corresponds to the duration of the
  152 + // longest track in the presentation. If the duration cannot be determined then duration is set to all 1s.
  153 + uint64_t duration;
  154 +public:
  155 + // a fixed point 16.16 number that indicates the preferred rate to play the presentation; 1.0
  156 + // (0x00010000) is normal forward playback
  157 + uint32_t rate;
  158 + // a fixed point 8.8 number that indicates the preferred playback volume. 1.0 (0x0100) is full volume.
  159 + uint16_t volume;
  160 + uint16_t reserved0;
  161 + uint64_t reserved1;
  162 + // a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex values (0,0,0x40000000).
  163 + int32_t matrix[9];
  164 + uint32_t pre_defined[6];
  165 + // a non-zero integer that indicates a value to use for the track ID of the next track to be
  166 + // added to this presentation. Zero is not a valid track ID value. The value of next_track_ID shall be
  167 + // larger than the largest track-ID in use. If this value is equal to all 1s (32-bit maxint), and a new media
  168 + // track is to be added, then a search must be made in the file for an unused track identifier.
  169 + uint32_t next_track_ID;
  170 +public:
103 SrsMp4MovieHeaderBox(); 171 SrsMp4MovieHeaderBox();
104 virtual ~SrsMp4MovieHeaderBox(); 172 virtual ~SrsMp4MovieHeaderBox();
105 }; 173 };
106 174
  175 +/**
  176 + * 8.3.1 Track Box (trak)
  177 + * ISO_IEC_14496-12-base-format-2012.pdf, page 32
  178 + * This is a container box for a single track of a presentation. A presentation consists of one or more tracks.
  179 + * Each track is independent of the other tracks in the presentation and carries its own temporal and spatial
  180 + * information. Each track will contain its associated Media Box.
  181 + */
  182 +class SrsMp4TrackBox : public SrsMp4Box
  183 +{
  184 +public:
  185 + SrsMp4TrackBox();
  186 + virtual ~SrsMp4TrackBox();
  187 +};
  188 +
  189 +/**
  190 + * 8.3.2 Track Header Box (tkhd)
  191 + * ISO_IEC_14496-12-base-format-2012.pdf, page 32
  192 + */
  193 +class SrsMp4TrackHeaderBox : public SrsMp4FullBox
  194 +{
  195 +public:
  196 + // an integer that declares the creation time of the presentation (in seconds since
  197 + // midnight, Jan. 1, 1904, in UTC time)
  198 + uint64_t creation_time;
  199 + // an integer that declares the most recent time the presentation was modified (in
  200 + // seconds since midnight, Jan. 1, 1904, in UTC time)
  201 + uint64_t modification_time;
  202 + // an integer that specifies the time-scale for the entire presentation; this is the number of
  203 + // time units that pass in one second. For example, a time coordinate system that measures time in
  204 + // sixtieths of a second has a time scale of 60.
  205 + uint32_t timescale;
  206 + // an integer that uniquely identifies this track over the entire life-time of this presentation.
  207 + // Track IDs are never re-used and cannot be zero.
  208 + uint32_t track_ID;
  209 + uint32_t reserved0;
  210 + // an integer that indicates the duration of this track (in the timescale indicated in the Movie
  211 + // Header Box). The value of this field is equal to the sum of the durations of all of the track’s edits. If
  212 + // there is no edit list, then the duration is the sum of the sample durations, converted into the timescale
  213 + // in the Movie Header Box. If the duration of this track cannot be determined then duration is set to all
  214 + // 1s.
  215 + uint64_t duration;
  216 +public:
  217 + uint64_t reserved1;
  218 + // specifies the front-to-back ordering of video tracks; tracks with lower numbers are closer to the
  219 + // viewer. 0 is the normal value, and -1 would be in front of track 0, and so on.
  220 + int16_t layer;
  221 + // an integer that specifies a group or collection of tracks. If this field is 0 there is no
  222 + // information on possible relations to other tracks. If this field is not 0, it should be the same for tracks
  223 + // that contain alternate data for one another and different for tracks belonging to different such groups.
  224 + // Only one track within an alternate group should be played or streamed at any one time, and must be
  225 + // distinguishable from other tracks in the group via attributes such as bitrate, codec, language, packet
  226 + // size etc. A group may have only one member.
  227 + int16_t alternate_group;
  228 + // a fixed 8.8 value specifying the track's relative audio volume. Full volume is 1.0 (0x0100) and
  229 + // is the normal value. Its value is irrelevant for a purely visual track. Tracks may be composed by
  230 + // combining them according to their volume, and then using the overall Movie Header Box volume
  231 + // setting; or more complex audio composition (e.g. MPEG-4 BIFS) may be used.
  232 + int16_t volume;
  233 + uint16_t reserved2;
  234 + // a transformation matrix for the video; (u,v,w) are restricted here to (0,0,1), hex (0,0,0x40000000).
  235 + int32_t matrix[9];
  236 + // the track's visual presentation size as fixed-point 16.16 values. These need
  237 + // not be the same as the pixel dimensions of the images, which is documented in the sample
  238 + // description(s); all images in the sequence are scaled to this size, before any overall transformation of
  239 + // the track represented by the matrix. The pixel dimensions of the images are the default values.
  240 + int32_t width;
  241 + int32_t height;
  242 +public:
  243 + SrsMp4TrackHeaderBox();
  244 + virtual ~SrsMp4TrackHeaderBox();
  245 +};
  246 +
  247 +/**
  248 + * 8.6.5 Edit Box (edts)
  249 + * ISO_IEC_14496-12-base-format-2012.pdf, page 54
  250 + * An Edit Box maps the presentation time-line to the media time-line as it is stored in the file.
  251 + * The Edit Box is a container for the edit lists.
  252 + */
  253 +class SrsMp4EditBox : public SrsMp4Box
  254 +{
  255 +public:
  256 + SrsMp4EditBox();
  257 + virtual ~SrsMp4EditBox();
  258 +};
  259 +
  260 +/**
  261 + * 8.6.6 Edit List Box
  262 + * ISO_IEC_14496-12-base-format-2012.pdf, page 55
  263 + */
  264 +struct SrsMp4ElstEntry
  265 +{
  266 +public:
  267 + // an integer that specifies the duration of this edit segment in units of the timescale
  268 + // in the Movie Header Box
  269 + uint64_t segment_duration;
  270 + // an integer containing the starting time within the media of this edit segment (in media time
  271 + // scale units, in composition time). If this field is set to –1, it is an empty edit. The last edit in a track
  272 + // shall never be an empty edit. Any difference between the duration in the Movie Header Box, and the
  273 + // track’s duration is expressed as an implicit empty edit at the end.
  274 + int64_t media_time;
  275 +public:
  276 + // specifies the relative rate at which to play the media corresponding to this edit segment. If this value is 0,
  277 + // then the edit is specifying a ‘dwell’: the media at media-time is presented for the segment-duration. Otherwise
  278 + // this field shall contain the value 1.
  279 + int16_t media_rate_integer;
  280 + int16_t media_rate_fraction;
  281 +public:
  282 + SrsMp4ElstEntry();
  283 +};
  284 +
  285 +/**
  286 + * 8.6.6 Edit List Box (elst)
  287 + * ISO_IEC_14496-12-base-format-2012.pdf, page 54
  288 + * This box contains an explicit timeline map. Each entry defines part of the track time-line: by mapping part of
  289 + * the media time-line, or by indicating ‘empty’ time, or by defining a ‘dwell’, where a single time-point in the
  290 + * media is held for a period.
  291 + */
  292 +class SrsMp4EditListBox : public SrsMp4FullBox
  293 +{
  294 +public:
  295 + // an integer that gives the number of entries in the following table
  296 + uint32_t entry_count;
  297 + SrsMp4ElstEntry* entries;
  298 +public:
  299 + SrsMp4EditListBox();
  300 + virtual ~SrsMp4EditListBox();
  301 +};
  302 +
  303 +/**
  304 + * 8.4.1 Media Box (mdia)
  305 + * ISO_IEC_14496-12-base-format-2012.pdf, page 36
  306 + * The media declaration container contains all the objects that declare information about the media data within a
  307 + * track.
  308 + */
  309 +class SrsMp4MediaBox : public SrsMp4Box
  310 +{
  311 +public:
  312 + SrsMp4MediaBox();
  313 + virtual ~SrsMp4MediaBox();
  314 +};
  315 +
  316 +/**
  317 + * 8.4.2 Media Header Box (mdhd)
  318 + * ISO_IEC_14496-12-base-format-2012.pdf, page 36
  319 + * The media declaration container contains all the objects that declare information about the media data within a
  320 + * track.
  321 + */
  322 +class SrsMp4MediaHeaderBox : public SrsMp4FullBox
  323 +{
  324 +public:
  325 + // an integer that declares the creation time of the presentation (in seconds since
  326 + // midnight, Jan. 1, 1904, in UTC time)
  327 + uint64_t creation_time;
  328 + // an integer that declares the most recent time the presentation was modified (in
  329 + // seconds since midnight, Jan. 1, 1904, in UTC time)
  330 + uint64_t modification_time;
  331 + // an integer that specifies the time-scale for the entire presentation; this is the number of
  332 + // time units that pass in one second. For example, a time coordinate system that measures time in
  333 + // sixtieths of a second has a time scale of 60.
  334 + uint32_t timescale;
  335 + // an integer that declares length of the presentation (in the indicated timescale). This property
  336 + // is derived from the presentation’s tracks: the value of this field corresponds to the duration of the
  337 + // longest track in the presentation. If the duration cannot be determined then duration is set to all 1s.
  338 + uint64_t duration;
  339 +public:
  340 + uint8_t pad:1;
  341 + // the language code for this media. See ISO 639-2/T for the set of three character
  342 + // codes. Each character is packed as the difference between its ASCII value and 0x60. Since the code
  343 + // is confined to being three lower-case letters, these values are strictly positive.
  344 + uint16_t language:15;
  345 + uint16_t pre_defined;
  346 +public:
  347 + SrsMp4MediaHeaderBox();
  348 + virtual ~SrsMp4MediaHeaderBox();
  349 +};
  350 +
  351 +/**
  352 + * 8.4.3 Handler Reference Box (hdlr)
  353 + * ISO_IEC_14496-12-base-format-2012.pdf, page 37
  354 + * This box within a Media Box declares the process by which the media-data in the track is presented, and thus,
  355 + * the nature of the media in a track. For example, a video track would be handled by a video handler.
  356 + */
  357 +class SrsMp4HandlerReferenceBox : public SrsMp4FullBox
  358 +{
  359 +public:
  360 + uint32_t pre_defined;
  361 + // an integer containing one of the following values, or a value from a derived specification:
  362 + // ‘vide’, Video track
  363 + // ‘soun’, Audio track
  364 + uint32_t handler_type;
  365 + uint32_t reserved[3];
  366 + // a null-terminated string in UTF-8 characters which gives a human-readable name for the track
  367 + // type (for debugging and inspection purposes).
  368 + std::string name;
  369 +public:
  370 + SrsMp4HandlerReferenceBox();
  371 + virtual ~SrsMp4HandlerReferenceBox();
  372 +};
  373 +
  374 +/**
  375 + * 8.4.4 Media Information Box (minf)
  376 + * ISO_IEC_14496-12-base-format-2012.pdf, page 38
  377 + * This box contains all the objects that declare characteristic information of the media in the track.
  378 + */
  379 +class SrsMp4MediaInformationBox : public SrsMp4Box
  380 +{
  381 +public:
  382 + SrsMp4MediaInformationBox();
  383 + virtual ~SrsMp4MediaInformationBox();
  384 +};
  385 +
  386 +/**
  387 + * 8.4.5.2 Video Media Header Box (vmhd)
  388 + * ISO_IEC_14496-12-base-format-2012.pdf, page 38
  389 + * The video media header contains general presentation information, independent of the coding, for video
  390 + * media. Note that the flags field has the value 1.
  391 + */
  392 +class SrsMp4VideoMeidaHeaderBox : public SrsMp4FullBox
  393 +{
  394 +public:
  395 + // a composition mode for this video track, from the following enumerated set,
  396 + // which may be extended by derived specifications:
  397 + // copy = 0 copy over the existing image
  398 + uint16_t graphicsmode;
  399 + // a set of 3 colour values (red, green, blue) available for use by graphics modes
  400 + uint16_t opcolor[3];
  401 +public:
  402 + SrsMp4VideoMeidaHeaderBox();
  403 + virtual ~SrsMp4VideoMeidaHeaderBox();
  404 +};
  405 +
  406 +/**
  407 + * 8.4.5.3 Sound Media Header Box (smhd)
  408 + * ISO_IEC_14496-12-base-format-2012.pdf, page 39
  409 + * The sound media header contains general presentation information, independent of the coding, for audio
  410 + * media. This header is used for all tracks containing audio.
  411 + */
  412 +class SrsMp4SoundMeidaHeaderBox : public SrsMp4FullBox
  413 +{
  414 +public:
  415 + // a fixed-point 8.8 number that places mono audio tracks in a stereo space; 0 is centre (the
  416 + // normal value); full left is -1.0 and full right is 1.0.
  417 + int16_t balance;
  418 + uint16_t reserved;
  419 +public:
  420 + SrsMp4SoundMeidaHeaderBox();
  421 + virtual ~SrsMp4SoundMeidaHeaderBox();
  422 +};
  423 +
  424 +/**
  425 + * 8.7.1 Data Information Box (dinf)
  426 + * ISO_IEC_14496-12-base-format-2012.pdf, page 56
  427 + * The data information box contains objects that declare the location of the media information in a track.
  428 + */
  429 +class SrsMp4DataInformationBox : public SrsMp4Box
  430 +{
  431 +public:
  432 + SrsMp4DataInformationBox();
  433 + virtual ~SrsMp4DataInformationBox();
  434 +};
  435 +
  436 +/**
  437 + * 8.7.2 Data Reference Box
  438 + * ISO_IEC_14496-12-base-format-2012.pdf, page 56
  439 + */
  440 +class SrsMp4DataEntryBox : public SrsMp4FullBox
  441 +{
  442 +public:
  443 + std::string location;
  444 +public:
  445 + SrsMp4DataEntryBox();
  446 +};
  447 +
  448 +/**
  449 + * 8.7.2 Data Reference Box (url )
  450 + * ISO_IEC_14496-12-base-format-2012.pdf, page 56
  451 + */
  452 +class SrsMp4DataEntryUrlBox : public SrsMp4DataEntryBox
  453 +{
  454 +public:
  455 + SrsMp4DataEntryUrlBox();
  456 +};
  457 +
  458 +/**
  459 + * 8.7.2 Data Reference Box (urn )
  460 + * ISO_IEC_14496-12-base-format-2012.pdf, page 56
  461 + */
  462 +class SrsMp4DataEntryUrnBox : public SrsMp4DataEntryBox
  463 +{
  464 +public:
  465 + std::string name;
  466 +public:
  467 + SrsMp4DataEntryUrnBox();
  468 +};
  469 +
  470 +/**
  471 + * 8.7.2 Data Reference Box (dref)
  472 + * ISO_IEC_14496-12-base-format-2012.pdf, page 56
  473 + * The data reference object contains a table of data references (normally URLs) that declare the location(s) of
  474 + * the media data used within the presentation. The data reference index in the sample description ties entries
  475 + * in this table to the samples in the track. A track may be split over several sources in this way.
  476 + */
  477 +class SrsMp4DataReferenceBox : public SrsMp4FullBox
  478 +{
  479 +public:
  480 + // an integer that counts the actual entries
  481 + uint32_t entry_count;
  482 + SrsMp4DataEntryBox* entries;
  483 +public:
  484 + SrsMp4DataReferenceBox();
  485 + virtual ~SrsMp4DataReferenceBox();
  486 +};
  487 +
  488 +/**
  489 + * 8.5.1 Sample Table Box (stbl)
  490 + * ISO_IEC_14496-12-base-format-2012.pdf, page 40
  491 + * The sample table contains all the time and data indexing of the media samples in a track. Using the tables
  492 + * here, it is possible to locate samples in time, determine their type (e.g. I-frame or not), and determine their
  493 + * size, container, and offset into that container.
  494 + */
  495 +class SrsMp4SampleTableBox : public SrsMp4Box
  496 +{
  497 +public:
  498 + SrsMp4SampleTableBox();
  499 + virtual ~SrsMp4SampleTableBox();
  500 +};
  501 +
  502 +/**
  503 + * 8.5.2 Sample Description Box
  504 + * ISO_IEC_14496-12-base-format-2012.pdf, page 43
  505 + */
  506 +class SrsMp4SampleEntry : public SrsMp4Box
  507 +{
  508 +public:
  509 + uint8_t reserved[6];
  510 + // an integer that contains the index of the data reference to use to retrieve
  511 + // data associated with samples that use this sample description. Data references are stored in Data
  512 + // Reference Boxes. The index ranges from 1 to the number of data references.
  513 + uint16_t data_reference_index;
  514 +public:
  515 + SrsMp4SampleEntry();
  516 + virtual ~SrsMp4SampleEntry();
  517 +};
  518 +
  519 +/**
  520 + * 8.5.2 Sample Description Box (avc1)
  521 + * ISO_IEC_14496-12-base-format-2012.pdf, page 44
  522 + */
  523 +class SrsMp4VisualSampleEntry : public SrsMp4SampleEntry
  524 +{
  525 +public:
  526 + uint16_t pre_defined0;
  527 + uint16_t reserved0;
  528 + uint32_t pre_defined1[3];
  529 + // the maximum visual width and height of the stream described by this sample
  530 + // description, in pixels
  531 + uint16_t width;
  532 + uint16_t height;
  533 + uint32_t horizresolution;
  534 + uint32_t vertresolution;
  535 + uint32_t reserved1;
  536 + // how many frames of compressed video are stored in each sample. The default is
  537 + // 1, for one frame per sample; it may be more than 1 for multiple frames per sample
  538 + uint16_t frame_count;
  539 + // a name, for informative purposes. It is formatted in a fixed 32-byte field, with the first
  540 + // byte set to the number of bytes to be displayed, followed by that number of bytes of displayable data,
  541 + // and then padding to complete 32 bytes total (including the size byte). The field may be set to 0.
  542 + char compressorname[32];
  543 + // one of the following values
  544 + // 0x0018 – images are in colour with no alpha
  545 + uint16_t depth;
  546 + int16_t pre_defined2;
  547 +public:
  548 + SrsMp4VisualSampleEntry();
  549 + virtual ~SrsMp4VisualSampleEntry();
  550 +};
  551 +
  552 +/**
  553 + * 8.5.2 Sample Description Box (mp4a)
  554 + * ISO_IEC_14496-12-base-format-2012.pdf, page 45
  555 + */
  556 +class SrsMp4AudioSampleEntry : public SrsMp4SampleEntry
  557 +{
  558 +public:
  559 + uint32_t reserved0[2];
  560 + uint16_t channelcount;
  561 + uint16_t samplesize;
  562 + uint16_t pre_defined0;
  563 + uint16_t reserved1;
  564 + uint32_t samplerate;
  565 +public:
  566 + SrsMp4AudioSampleEntry();
  567 + virtual ~SrsMp4AudioSampleEntry();
  568 +};
  569 +
  570 +/**
  571 + * 8.5.2 Sample Description Box (stsd)
  572 + * ISO_IEC_14496-12-base-format-2012.pdf, page 40
  573 + * The sample description table gives detailed information about the coding type used, and any initialization
  574 + * information needed for that coding.
  575 + */
  576 +class SrsMp4SampleDescriptionBox : public SrsMp4FullBox
  577 +{
  578 +public:
  579 + // an integer that gives the number of entries in the following table
  580 + uint32_t entry_count;
  581 + SrsMp4SampleEntry* entries;
  582 +public:
  583 + SrsMp4SampleDescriptionBox();
  584 + virtual ~SrsMp4SampleDescriptionBox();
  585 +};
  586 +
  587 +/**
  588 + * 8.6.1.2 Decoding Time to Sample Box (stts)
  589 + * ISO_IEC_14496-12-base-format-2012.pdf, page 48
  590 + */
  591 +struct SrsMp4SttsEntry
  592 +{
  593 + // an integer that counts the number of consecutive samples that have the given
  594 + // duration.
  595 + uint32_t sample_count;
  596 + // an integer that gives the delta of these samples in the time-scale of the media.
  597 + uint32_t sample_delta;
  598 + // Constructor
  599 + SrsMp4SttsEntry();
  600 +};
  601 +
  602 +/**
  603 + * 8.6.1.2 Decoding Time to Sample Box (stts)
  604 + * ISO_IEC_14496-12-base-format-2012.pdf, page 48
  605 + * This box contains a compact version of a table that allows indexing from decoding time to sample number.
  606 + * Other tables give sample sizes and pointers, from the sample number. Each entry in the table gives the
  607 + * number of consecutive samples with the same time delta, and the delta of those samples. By adding the
  608 + * deltas a complete time-to-sample map may be built.
  609 + */
  610 +class SrsMp4DecodingTime2SampleBox : public SrsMp4FullBox
  611 +{
  612 +public:
  613 + // an integer that gives the number of entries in the following table.
  614 + uint32_t entry_count;
  615 + SrsMp4SttsEntry* entries;
  616 +public:
  617 + SrsMp4DecodingTime2SampleBox();
  618 + virtual ~SrsMp4DecodingTime2SampleBox();
  619 +};
  620 +
  621 +
  622 +/**
  623 + * 8.6.1.3 Composition Time to Sample Box (ctts)
  624 + * ISO_IEC_14496-12-base-format-2012.pdf, page 49
  625 + */
  626 +struct SrsMp4CttsEntry
  627 +{
  628 + // an integer that counts the number of consecutive samples that have the given offset.
  629 + uint32_t sample_count;
  630 + // uint32_t for version=0
  631 + // int32_t for version=1
  632 + // an integer that gives the offset between CT and DT, such that CT(n) = DT(n) +
  633 + // CTTS(n).
  634 + int64_t sample_offset;
  635 + // Constructor
  636 + SrsMp4CttsEntry();
  637 +};
  638 +
  639 + /**
  640 + * 8.6.1.3 Composition Time to Sample Box (ctts)
  641 + * ISO_IEC_14496-12-base-format-2012.pdf, page 49
  642 + * This box provides the offset between decoding time and composition time. In version 0 of this box the
  643 + * decoding time must be less than the composition time, and the offsets are expressed as unsigned numbers
  644 + * such that CT(n) = DT(n) + CTTS(n) where CTTS(n) is the (uncompressed) table entry for sample n. In version
  645 + * 1 of this box, the composition timeline and the decoding timeline are still derived from each other, but the
  646 + * offsets are signed. It is recommended that for the computed composition timestamps, there is exactly one with
  647 + * the value 0 (zero).
  648 + */
  649 +class SrsMp4CompositionTime2SampleBox : public SrsMp4FullBox
  650 +{
  651 +public:
  652 + // an integer that gives the number of entries in the following table.
  653 + uint32_t entry_count;
  654 + SrsMp4CttsEntry* entries;
  655 +public:
  656 + SrsMp4CompositionTime2SampleBox();
  657 + virtual ~SrsMp4CompositionTime2SampleBox();
  658 +};
  659 +
107 #endif 660 #endif
108 661