Fangjun Kuang
Committed by GitHub

Fix reading wave files with metadata (#21)

@@ -25,7 +25,6 @@ @@ -25,7 +25,6 @@
25 #include <vector> 25 #include <vector>
26 26
27 namespace sherpa_onnx { 27 namespace sherpa_onnx {
28 -  
29 namespace { 28 namespace {
30 // see http://soundfile.sapp.org/doc/WaveFormat/ 29 // see http://soundfile.sapp.org/doc/WaveFormat/
31 // 30 //
@@ -33,10 +32,10 @@ namespace { @@ -33,10 +32,10 @@ namespace {
33 // TODO(fangjun): Support big endian 32 // TODO(fangjun): Support big endian
34 struct WaveHeader { 33 struct WaveHeader {
35 void Validate() const { 34 void Validate() const {
36 - // F F I R 35 + // F F I R
37 assert(chunk_id == 0x46464952); 36 assert(chunk_id == 0x46464952);
38 assert(chunk_size == 36 + subchunk2_size); 37 assert(chunk_size == 36 + subchunk2_size);
39 - // E V A W 38 + // E V A W
40 assert(format == 0x45564157); 39 assert(format == 0x45564157);
41 assert(subchunk1_id == 0x20746d66); 40 assert(subchunk1_id == 0x20746d66);
42 assert(subchunk1_size == 16); // 16 for PCM 41 assert(subchunk1_size == 16); // 16 for PCM
@@ -47,6 +46,22 @@ struct WaveHeader { @@ -47,6 +46,22 @@ struct WaveHeader {
47 assert(bits_per_sample == 16); // we support only 16 bits per sample 46 assert(bits_per_sample == 16); // we support only 16 bits per sample
48 } 47 }
49 48
  49 + // See
  50 + // https://en.wikipedia.org/wiki/WAV#Metadata
  51 + // and
  52 + // https://www.robotplanet.dk/audio/wav_meta_data/riff_mci.pdf
  53 + void SeekToDataChunk(std::istream &is) {
  54 + // a t a d
  55 + while (subchunk2_id != 0x61746164) {
  56 + // const char *p = reinterpret_cast<const char *>(&subchunk2_id);
  57 + // printf("Skip chunk (%x): %c%c%c%c of size: %d\n", subchunk2_id, p[0],
  58 + // p[1], p[2], p[3], subchunk2_size);
  59 + is.seekg(subchunk2_size, std::istream::cur);
  60 + is.read(reinterpret_cast<char *>(&subchunk2_id), sizeof(int32_t));
  61 + is.read(reinterpret_cast<char *>(&subchunk2_size), sizeof(int32_t));
  62 + }
  63 + }
  64 +
50 int32_t chunk_id; 65 int32_t chunk_id;
51 int32_t chunk_size; 66 int32_t chunk_size;
52 int32_t format; 67 int32_t format;
@@ -58,8 +73,8 @@ struct WaveHeader { @@ -58,8 +73,8 @@ struct WaveHeader {
58 int32_t byte_rate; 73 int32_t byte_rate;
59 int16_t block_align; 74 int16_t block_align;
60 int16_t bits_per_sample; 75 int16_t bits_per_sample;
61 - int32_t subchunk2_id;  
62 - int32_t subchunk2_size; 76 + int32_t subchunk2_id; // a tag of this chunk
  77 + int32_t subchunk2_size; // size of subchunk2
63 }; 78 };
64 static_assert(sizeof(WaveHeader) == 44, ""); 79 static_assert(sizeof(WaveHeader) == 44, "");
65 80
@@ -69,9 +84,10 @@ std::vector<float> ReadWaveImpl(std::istream &is, float *sample_rate) { @@ -69,9 +84,10 @@ std::vector<float> ReadWaveImpl(std::istream &is, float *sample_rate) {
69 WaveHeader header; 84 WaveHeader header;
70 is.read(reinterpret_cast<char *>(&header), sizeof(header)); 85 is.read(reinterpret_cast<char *>(&header), sizeof(header));
71 assert(static_cast<bool>(is)); 86 assert(static_cast<bool>(is));
72 -  
73 header.Validate(); 87 header.Validate();
74 88
  89 + header.SeekToDataChunk(is);
  90 +
75 *sample_rate = header.sample_rate; 91 *sample_rate = header.sample_rate;
76 92
77 // header.subchunk2_size contains the number of bytes in the data. 93 // header.subchunk2_size contains the number of bytes in the data.