winlin

demux aac from ts and write to file.aac

@@ -558,6 +558,9 @@ public: @@ -558,6 +558,9 @@ public:
558 // 2.4.3.7 Semantic definition of fields in PES packet. page 49. 558 // 2.4.3.7 Semantic definition of fields in PES packet. page 49.
559 int32_t packet_start_code_prefix; 559 int32_t packet_start_code_prefix;
560 560
  561 + int64_t pts; // 33bits
  562 + int64_t dts; // 33bits
  563 +
561 // header size. 564 // header size.
562 int packet_header_size; 565 int packet_header_size;
563 566
@@ -681,6 +684,7 @@ TSMessage::TSMessage() @@ -681,6 +684,7 @@ TSMessage::TSMessage()
681 stream_type = TSStreamTypeReserved; 684 stream_type = TSStreamTypeReserved;
682 stream_id = 0; 685 stream_id = 0;
683 packet_start_code_prefix = 0; 686 packet_start_code_prefix = 0;
  687 + pts = dts = 0;
684 PES_packet_length = 0; 688 PES_packet_length = 0;
685 packet_header_size = 0; 689 packet_header_size = 0;
686 parsed_packet_size = 0; 690 parsed_packet_size = 0;
@@ -1439,6 +1443,8 @@ int TSPayloadPES::demux(TSContext* ctx, TSPacket* pkt, u_int8_t* start, u_int8_t @@ -1439,6 +1443,8 @@ int TSPayloadPES::demux(TSContext* ctx, TSPacket* pkt, u_int8_t* start, u_int8_t
1439 msg->continuity_counter = pid->continuity_counter; 1443 msg->continuity_counter = pid->continuity_counter;
1440 msg->stream_id = stream_id; 1444 msg->stream_id = stream_id;
1441 msg->packet_start_code_prefix = packet_start_code_prefix; 1445 msg->packet_start_code_prefix = packet_start_code_prefix;
  1446 + msg->dts = dts;
  1447 + msg->pts = pts;
1442 1448
1443 // PES_packet_data_byte, page58. 1449 // PES_packet_data_byte, page58.
1444 // the packet size contains the header size. 1450 // the packet size contains the header size.
@@ -1789,7 +1795,31 @@ public: @@ -1789,7 +1795,31 @@ public:
1789 }; 1795 };
1790 1796
1791 /** 1797 /**
1792 -* 6.2 Audio Data Transport Stream, ADTS 1798 +* Table 35 – Sampling frequency dependent on
  1799 +* sampling_frequency_index. in page 46.
  1800 +*/
  1801 +enum TSAacSampleFrequency
  1802 +{
  1803 + TSAacSampleFrequency96000 = 0x00,
  1804 + TSAacSampleFrequency88200 = 0x01,
  1805 + TSAacSampleFrequency64000 = 0x02,
  1806 + TSAacSampleFrequency48000 = 0x03,
  1807 + TSAacSampleFrequency44100 = 0x04,
  1808 + TSAacSampleFrequency32000 = 0x05,
  1809 + TSAacSampleFrequency24000 = 0x06,
  1810 + TSAacSampleFrequency22050 = 0x07,
  1811 + TSAacSampleFrequency16000 = 0x08,
  1812 + TSAacSampleFrequency12000 = 0x09,
  1813 + TSAacSampleFrequency11025 = 0x0a,
  1814 + TSAacSampleFrequency8000 = 0x0b,
  1815 + TSAacSampleFrequencyReserved0 = 0x0c,
  1816 + TSAacSampleFrequencyReserved1 = 0x0d,
  1817 + TSAacSampleFrequencyReserved2 = 0x0e,
  1818 + TSAacSampleFrequencyReserved3 = 0x0f,
  1819 +};
  1820 +
  1821 +/**
  1822 +* 6.2 Audio Data Transport Stream, ADTS, in page 26.
1793 */ 1823 */
1794 class TSAacAdts 1824 class TSAacAdts
1795 { 1825 {
@@ -1802,7 +1832,7 @@ public: @@ -1802,7 +1832,7 @@ public:
1802 int8_t protection_absent; //1bit 1832 int8_t protection_absent; //1bit
1803 // 12bits 1833 // 12bits
1804 int8_t profile; //2bit 1834 int8_t profile; //2bit
1805 - int8_t sampling_frequency_index; //4bits 1835 + TSAacSampleFrequency sampling_frequency_index; //4bits
1806 int8_t private_bit; //1bit 1836 int8_t private_bit; //1bit
1807 int8_t channel_configuration; //3bits 1837 int8_t channel_configuration; //3bits
1808 int8_t original_or_copy; //1bit 1838 int8_t original_or_copy; //1bit
@@ -1826,7 +1856,7 @@ public: @@ -1826,7 +1856,7 @@ public:
1826 layer = 0; 1856 layer = 0;
1827 protection_absent = 0; 1857 protection_absent = 0;
1828 profile = 0; 1858 profile = 0;
1829 - sampling_frequency_index = 0; 1859 + sampling_frequency_index = TSAacSampleFrequencyReserved0;
1830 private_bit = 0; 1860 private_bit = 0;
1831 channel_configuration = 0; 1861 channel_configuration = 0;
1832 original_or_copy = 0; 1862 original_or_copy = 0;
@@ -1904,7 +1934,7 @@ public: @@ -1904,7 +1934,7 @@ public:
1904 private_bit = temp & 0x01; 1934 private_bit = temp & 0x01;
1905 temp = temp >> 1; 1935 temp = temp >> 1;
1906 1936
1907 - sampling_frequency_index = temp & 0x0F; 1937 + sampling_frequency_index = (TSAacSampleFrequency)(temp & 0x0F);
1908 temp = temp >> 4; 1938 temp = temp >> 4;
1909 1939
1910 profile = temp & 0x03; 1940 profile = temp & 0x03;
@@ -1928,7 +1958,199 @@ public: @@ -1928,7 +1958,199 @@ public:
1928 } 1958 }
1929 }; 1959 };
1930 1960
1931 -int consume(TSMessage* msg) 1961 +class FlvMuxer
  1962 +{
  1963 +public:
  1964 + int fd;
  1965 + const char* file;
  1966 + bool audio_sequence_header_writen;
  1967 +
  1968 + FlvMuxer()
  1969 + {
  1970 + file = NULL;
  1971 + fd = 0;
  1972 + audio_sequence_header_writen = false;
  1973 + }
  1974 +
  1975 + virtual ~FlvMuxer()
  1976 + {
  1977 + if (fd > 0) {
  1978 + close(fd);
  1979 + }
  1980 + }
  1981 +
  1982 + int open(const char* _file)
  1983 + {
  1984 + file = _file;
  1985 + if ((fd = ::open(file, O_CREAT|O_WRONLY|O_TRUNC,
  1986 + S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH)) < 0
  1987 + ) {
  1988 + return -1;
  1989 + }
  1990 +
  1991 + char header[] = {
  1992 + 0x46, 0x4c, 0x56, // FLV
  1993 + 0x01, // version: 01
  1994 + 0x04, // 0x05:audio+video, 0x01:video, 0x04:audio
  1995 + 0x00, 0x00, 0x00, 0x09, // offset: always 0x09
  1996 + 0x00, 0x00, 0x00, 0x00 // previous tag 0: always 0
  1997 + };
  1998 + if (write(fd, header, sizeof(header)) != sizeof(header)) {
  1999 + return -1;
  2000 + }
  2001 +
  2002 + return 0;
  2003 + }
  2004 +
  2005 + /**
  2006 + * @param size, if 0 for sequence header.
  2007 + * @param sound_rate, Sampling rate. The following values are defined:
  2008 + * 0 = 5.5 kHz
  2009 + * 1 = 11 kHz
  2010 + * 2 = 22 kHz
  2011 + * 3 = 44 kHz
  2012 + * @param sound_type, Mono or stereo sound
  2013 + * 0 = Mono sound
  2014 + * 1 = Stereo sound
  2015 + */
  2016 + int write_audio(char* data, int size, u_int32_t timestamp, int sound_rate, int sound_type)
  2017 + {
  2018 + if (size > 0 && !audio_sequence_header_writen) {
  2019 + audio_sequence_header_writen = true;
  2020 + if (write_audio(NULL, 0, 0, sound_rate, sound_type) != 0) {
  2021 + return -1;
  2022 + }
  2023 + }
  2024 +
  2025 + char tag_header[11];
  2026 + char sequence_header[2]; // aac only
  2027 +
  2028 + int data_size = size + sizeof(sequence_header);
  2029 + int tag_size = data_size + sizeof(tag_header);
  2030 +
  2031 + ////////////////////////////////////
  2032 + // 11bytes tag header.
  2033 + ////////////////////////////////////
  2034 + // TagType
  2035 + char* p = tag_header;
  2036 + *p++ = 0x08; // audio
  2037 +
  2038 + // DataSize
  2039 + char* pp = (char*)&data_size;
  2040 + *p++ = pp[2];
  2041 + *p++ = pp[1];
  2042 + *p++ = pp[0];
  2043 +
  2044 + // Timestamp
  2045 + pp = (char*)&timestamp;
  2046 + *p++ = pp[2];
  2047 + *p++ = pp[1];
  2048 + *p++ = pp[0];
  2049 +
  2050 + // TimestampExtended
  2051 + *p++ = pp[3];
  2052 +
  2053 + //StreamID
  2054 + *p++ = 0;
  2055 + *p++ = 0;
  2056 + *p++ = 0;
  2057 +
  2058 + ////////////////////////////////////
  2059 + // 2bytes codec header for aac
  2060 + ////////////////////////////////////
  2061 + // SoundFormat
  2062 + sequence_header[0] = 0xa0; // aac
  2063 + sequence_header[0] |= (sound_rate << 2) & 0x0c;
  2064 + sequence_header[0] |= 0x02; // Compressed formats always decode to 16 bits internally.
  2065 + sequence_header[0] |= sound_type & 0x01;
  2066 + // AACPacketType
  2067 + if (size == 0) {
  2068 + sequence_header[1] = 0x00;
  2069 + } else {
  2070 + sequence_header[1] = 0x01;
  2071 + }
  2072 +
  2073 + ////////////////////////////////////
  2074 + // 4bytes tag size
  2075 + ////////////////////////////////////
  2076 + char tag_size_bytes[4];
  2077 + p = tag_size_bytes;
  2078 + pp = (char*)&tag_size;
  2079 + *p++ = pp[4];
  2080 + *p++ = pp[2];
  2081 + *p++ = pp[1];
  2082 + *p++ = pp[0];
  2083 +
  2084 + // write
  2085 + if (write(fd, tag_header, sizeof(tag_header)) != sizeof(tag_header)) {
  2086 + return -1;
  2087 + }
  2088 + if (write(fd, sequence_header, sizeof(sequence_header)) != sizeof(sequence_header)) {
  2089 + return -1;
  2090 + }
  2091 + if (size > 0 && write(fd, data, size) != size) {
  2092 + return -1;
  2093 + }
  2094 + if (write(fd, tag_size_bytes, sizeof(tag_size_bytes)) != sizeof(tag_size_bytes)) {
  2095 + return -1;
  2096 + }
  2097 +
  2098 + return 0;
  2099 + }
  2100 +
  2101 + int write_video(char* data, int size)
  2102 + {
  2103 + return 0;
  2104 + }
  2105 +};
  2106 +
  2107 +class AacMuxer
  2108 +{
  2109 +public:
  2110 + int fd;
  2111 + const char* file;
  2112 +
  2113 + AacMuxer()
  2114 + {
  2115 + file = NULL;
  2116 + fd = 0;
  2117 + }
  2118 +
  2119 + virtual ~AacMuxer()
  2120 + {
  2121 + if (fd > 0) {
  2122 + close(fd);
  2123 + }
  2124 + }
  2125 +
  2126 + int open(const char* _file)
  2127 + {
  2128 + file = _file;
  2129 + if ((fd = ::open(file, O_CREAT|O_WRONLY|O_TRUNC,
  2130 + S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH)) < 0
  2131 + ) {
  2132 + return -1;
  2133 + }
  2134 +
  2135 + return 0;
  2136 + }
  2137 +
  2138 + int write_audio(char* data, int size)
  2139 + {
  2140 + if (size > 0 && write(fd, data, size) != size) {
  2141 + return -1;
  2142 + }
  2143 +
  2144 + return 0;
  2145 + }
  2146 +
  2147 + int write_video(char* data, int size)
  2148 + {
  2149 + return 0;
  2150 + }
  2151 +};
  2152 +
  2153 +int consume(TSMessage* msg, FlvMuxer* flv, AacMuxer* aac_muxer)
1932 { 2154 {
1933 int ret = 0; 2155 int ret = 0;
1934 2156
@@ -1941,6 +2163,11 @@ int consume(TSMessage* msg) @@ -1941,6 +2163,11 @@ int consume(TSMessage* msg)
1941 char* last = msg->packet_data + msg->packet_data_size; 2163 char* last = msg->packet_data + msg->packet_data_size;
1942 2164
1943 if (!msg->is_video()) { 2165 if (!msg->is_video()) {
  2166 + // write AAC raw audio.
  2167 + if (aac_muxer && (ret = aac_muxer->write_audio((char*)msg->packet_data, msg->packet_data_size)) != 0) {
  2168 + return ret;
  2169 + }
  2170 +
1944 // parse AAC audio. 2171 // parse AAC audio.
1945 while (p < last) { 2172 while (p < last) {
1946 TSAacAdts aac; 2173 TSAacAdts aac;
@@ -1949,7 +2176,35 @@ int consume(TSMessage* msg) @@ -1949,7 +2176,35 @@ int consume(TSMessage* msg)
1949 } 2176 }
1950 trace("ts+aac audio raw data parsed, size: %d, 0x%02x 0x%02x 0x%02x 0x%02x", 2177 trace("ts+aac audio raw data parsed, size: %d, 0x%02x 0x%02x 0x%02x 0x%02x",
1951 aac.size, aac.at(0), aac.at(1), aac.at(2), aac.at(3)); 2178 aac.size, aac.at(0), aac.at(1), aac.at(2), aac.at(3));
1952 - // TODO: process audio. 2179 +
  2180 + int sound_rate = 0;
  2181 + if (aac.sampling_frequency_index == TSAacSampleFrequency22050) {
  2182 + sound_rate = 0x02;
  2183 + } else if(aac.sampling_frequency_index == TSAacSampleFrequency44100) {
  2184 + sound_rate = 0x03;
  2185 + } else {
  2186 + // 0 = 5.5 kHz
  2187 + // 1 = 11 kHz
  2188 + // others.
  2189 + trace("ts+aac flv donot support sample-rate: %d", aac.sampling_frequency_index);
  2190 + return -1;
  2191 + }
  2192 +
  2193 + int sound_type = 0;
  2194 + if (aac.channel_configuration == 1) {
  2195 + // 0 = Mono sound
  2196 + sound_type = 0;
  2197 + } else if (aac.channel_configuration == 2) {
  2198 + // 1 = Stereo sound
  2199 + sound_type = 1;
  2200 + } else {
  2201 + trace("ts+aac flv donot support channel: %d", aac.channel_configuration);
  2202 + return -1;
  2203 + }
  2204 +
  2205 + if (flv && (ret = flv->write_audio((char*)aac.raw_data, aac.size, msg->pts, sound_rate, sound_type)) != 0) {
  2206 + return ret;
  2207 + }
1953 } 2208 }
1954 } else { 2209 } else {
1955 // parse H264 video. 2210 // parse H264 video.
@@ -1970,10 +2225,23 @@ int consume(TSMessage* msg) @@ -1970,10 +2225,23 @@ int consume(TSMessage* msg)
1970 int main(int /*argc*/, char** /*argv*/) 2225 int main(int /*argc*/, char** /*argv*/)
1971 { 2226 {
1972 const char* file = "livestream-1347.ts"; 2227 const char* file = "livestream-1347.ts";
1973 - //file = "nginx-rtmp-hls/livestream-1347-currupt.ts"; 2228 + const char* output_flv_file = "livestream.flv";
  2229 + const char* output_aac_file = "livestream.aac";
  2230 +
1974 int fd = open(file, O_RDONLY); 2231 int fd = open(file, O_RDONLY);
  2232 + FlvMuxer flv;
  2233 + AacMuxer aac_muxer;
1975 2234
1976 int ret = 0; 2235 int ret = 0;
  2236 + if ((ret = flv.open(output_flv_file)) != 0) {
  2237 + trace("flv+open open flv file failed.");
  2238 + return ret;
  2239 + }
  2240 + if ((ret = aac_muxer.open(output_aac_file)) != 0) {
  2241 + trace("aac_muxer+open open flv file failed.");
  2242 + return ret;
  2243 + }
  2244 +
1977 trace("demuxer+read packet count offset T+0 T+1 T+2 T+3 T+x T+L2 T+L1 T+L0"); 2245 trace("demuxer+read packet count offset T+0 T+1 T+2 T+3 T+x T+L2 T+L1 T+L0");
1978 2246
1979 TSContext ctx; 2247 TSContext ctx;
@@ -2012,7 +2280,7 @@ int main(int /*argc*/, char** /*argv*/) @@ -2012,7 +2280,7 @@ int main(int /*argc*/, char** /*argv*/)
2012 continue; 2280 continue;
2013 } 2281 }
2014 2282
2015 - if ((ret = consume(msg)) != 0) { 2283 + if ((ret = consume(msg, &flv, &aac_muxer)) != 0) {
2016 trace("demuxer+consume parse and consume message failed. ret=%d", ret); 2284 trace("demuxer+consume parse and consume message failed. ret=%d", ret);
2017 break; 2285 break;
2018 } 2286 }