From 5877d945a447d9e44760b93e68b99525cf5d4f87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Sebastian=20Dr=C3=B6ge?= <sebastian@centricular.com>
Date: Mon, 13 Jan 2020 11:58:12 +0200
Subject: [PATCH] qtdemux: Always prefer information from v1/v2 sound sample
 description over sample description entry

ffmpeg is doing the same and various files in the wild have bogus
information in the sample description if the same information is also
duplicated afterwards in the v1/v2 sound sample desription.

Previously we only did this for non-raw audio due to
  https://bugzilla.gnome.org/show_bug.cgi?id=374914
but this specific file is already worked around differently. It still
works after this change.

Also remove ad-hoc GST_READ_DOUBLE_BE re-implementation and move the
switch for legacy audio formats after reading all the sample
descriptions as we want to override the values from there.
---
 gst/isomp4/qtdemux.c | 126 ++++++++++++++++++++-----------------------
 1 file changed, 58 insertions(+), 68 deletions(-)

diff --git a/gst/isomp4/qtdemux.c b/gst/isomp4/qtdemux.c
index ff2345837b..086bd92c1e 100644
--- a/gst/isomp4/qtdemux.c
+++ b/gst/isomp4/qtdemux.c
@@ -11717,6 +11717,64 @@ qtdemux_parse_trak (GstQTDemux * qtdemux, GNode * trak)
       entry->bytes_per_packet = entry->bytes_per_sample;
 
       offset = 36;
+
+      if (version == 0x00010000) {
+        /* sample description entry (16) + sound sample description v1 (20+16) */
+        if (len < 52)
+          goto corrupt_file;
+
+        /* take information from here over the normal sample description */
+        entry->samples_per_packet = QT_UINT32 (stsd_entry_data + offset);
+        entry->bytes_per_packet = QT_UINT32 (stsd_entry_data + offset + 4);
+        entry->bytes_per_frame = QT_UINT32 (stsd_entry_data + offset + 8);
+        entry->bytes_per_sample = QT_UINT32 (stsd_entry_data + offset + 12);
+
+        GST_LOG_OBJECT (qtdemux, "Sound sample description Version 1");
+        GST_LOG_OBJECT (qtdemux, "samples/packet:   %d",
+            entry->samples_per_packet);
+        GST_LOG_OBJECT (qtdemux, "bytes/packet:     %d",
+            entry->bytes_per_packet);
+        GST_LOG_OBJECT (qtdemux, "bytes/frame:      %d",
+            entry->bytes_per_frame);
+        GST_LOG_OBJECT (qtdemux, "bytes/sample:     %d",
+            entry->bytes_per_sample);
+
+        if (!entry->sampled && entry->bytes_per_packet) {
+          entry->samples_per_frame = (entry->bytes_per_frame /
+              entry->bytes_per_packet) * entry->samples_per_packet;
+          GST_LOG_OBJECT (qtdemux, "samples/frame:    %d",
+              entry->samples_per_frame);
+        }
+      } else if (version == 0x00020000) {
+        /* sample description entry (16) + sound sample description v2 (56) */
+        if (len < 72)
+          goto corrupt_file;
+
+        /* take information from here over the normal sample description */
+        entry->rate = GST_READ_DOUBLE_BE (stsd_entry_data + offset + 4);
+        entry->n_channels = QT_UINT32 (stsd_entry_data + offset + 12);
+        entry->samples_per_frame = entry->n_channels;
+        entry->bytes_per_sample = QT_UINT32 (stsd_entry_data + offset + 20) / 8;
+        entry->bytes_per_packet = QT_UINT32 (stsd_entry_data + offset + 28);
+        entry->samples_per_packet = QT_UINT32 (stsd_entry_data + offset + 32);
+        entry->bytes_per_frame = entry->bytes_per_sample * entry->n_channels;
+
+        GST_LOG_OBJECT (qtdemux, "Sound sample description Version 2");
+        GST_LOG_OBJECT (qtdemux, "sample rate:        %g", entry->rate);
+        GST_LOG_OBJECT (qtdemux, "n_channels:         %d", entry->n_channels);
+        GST_LOG_OBJECT (qtdemux, "bits/channel:       %d",
+            entry->bytes_per_sample * 8);
+        GST_LOG_OBJECT (qtdemux, "format flags:       %X",
+            QT_UINT32 (stsd_entry_data + offset + 24));
+        GST_LOG_OBJECT (qtdemux, "bytes/packet:       %d",
+            entry->bytes_per_packet);
+        GST_LOG_OBJECT (qtdemux, "LPCM frames/packet: %d",
+            entry->samples_per_packet);
+      } else if (version != 0x00000) {
+        GST_WARNING_OBJECT (qtdemux, "unknown audio STSD version %08x",
+            version);
+      }
+
       switch (fourcc) {
           /* Yes, these have to be hard-coded */
         case FOURCC_MAC6:
@@ -11769,74 +11827,6 @@ qtdemux_parse_trak (GstQTDemux * qtdemux, GNode * trak)
           break;
       }
 
-      if (version == 0x00010000) {
-        /* sample description entry (16) + sound sample description v1 (20+16) */
-        if (len < 52)
-          goto corrupt_file;
-
-        switch (fourcc) {
-          case FOURCC_twos:
-          case FOURCC_sowt:
-          case FOURCC_raw_:
-          case FOURCC_lpcm:
-            break;
-          default:
-          {
-            /* only parse extra decoding config for non-pcm audio */
-            entry->samples_per_packet = QT_UINT32 (stsd_entry_data + offset);
-            entry->bytes_per_packet = QT_UINT32 (stsd_entry_data + offset + 4);
-            entry->bytes_per_frame = QT_UINT32 (stsd_entry_data + offset + 8);
-            entry->bytes_per_sample = QT_UINT32 (stsd_entry_data + offset + 12);
-
-            GST_LOG_OBJECT (qtdemux, "samples/packet:   %d",
-                entry->samples_per_packet);
-            GST_LOG_OBJECT (qtdemux, "bytes/packet:     %d",
-                entry->bytes_per_packet);
-            GST_LOG_OBJECT (qtdemux, "bytes/frame:      %d",
-                entry->bytes_per_frame);
-            GST_LOG_OBJECT (qtdemux, "bytes/sample:     %d",
-                entry->bytes_per_sample);
-
-            if (!entry->sampled && entry->bytes_per_packet) {
-              entry->samples_per_frame = (entry->bytes_per_frame /
-                  entry->bytes_per_packet) * entry->samples_per_packet;
-              GST_LOG_OBJECT (qtdemux, "samples/frame:    %d",
-                  entry->samples_per_frame);
-            }
-            break;
-          }
-        }
-      } else if (version == 0x00020000) {
-        union
-        {
-          gdouble fp;
-          guint64 val;
-        } qtfp;
-
-        /* sample description entry (16) + sound sample description v2 (56) */
-        if (len < 72)
-          goto corrupt_file;
-
-        qtfp.val = QT_UINT64 (stsd_entry_data + offset + 4);
-        entry->rate = qtfp.fp;
-        entry->n_channels = QT_UINT32 (stsd_entry_data + offset + 12);
-
-        GST_LOG_OBJECT (qtdemux, "Sound sample description Version 2");
-        GST_LOG_OBJECT (qtdemux, "sample rate:        %g", entry->rate);
-        GST_LOG_OBJECT (qtdemux, "n_channels:         %d", entry->n_channels);
-        GST_LOG_OBJECT (qtdemux, "bits/channel:       %d",
-            QT_UINT32 (stsd_entry_data + offset + 20));
-        GST_LOG_OBJECT (qtdemux, "format flags:       %X",
-            QT_UINT32 (stsd_entry_data + offset + 24));
-        GST_LOG_OBJECT (qtdemux, "bytes/packet:       %d",
-            QT_UINT32 (stsd_entry_data + offset + 28));
-        GST_LOG_OBJECT (qtdemux, "LPCM frames/packet: %d",
-            QT_UINT32 (stsd_entry_data + offset + 32));
-      } else if (version != 0x00000) {
-        GST_WARNING_OBJECT (qtdemux, "unknown audio STSD version %08x",
-            version);
-      }
-
       if (entry->caps)
         gst_caps_unref (entry->caps);