From 671c89c3924b92313b71eb05b72bf7a693bd513a Mon Sep 17 00:00:00 2001
From: Carlos Rafael Giani <crg7475@mailbox.org>
Date: Sun, 8 Sep 2019 15:54:08 +0200
Subject: [PATCH] mpg123: Add gapless playback support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Sebastian Dröge <sebastian@centricular.com>
Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1028>
---
 .../ext/mpg123/gstmpg123audiodec.c            | 361 +++++++++++++-----
 .../ext/mpg123/gstmpg123audiodec.h            |   3 +
 .../gst/audioparsers/gstmpegaudioparse.c      |   2 +-
 .../tests/check/elements/mpg123audiodec.c     | 145 ++++++-
 ...09ms-1ch-32000hz-gapless-with-lame-tag.mp3 | Bin
 5 files changed, 403 insertions(+), 108 deletions(-)
 rename {tests => subprojects/gst-plugins-good/tests}/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3 (100%)

diff --git a/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.c b/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.c
index dd7186ac84..83d159af89 100644
--- a/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.c
+++ b/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.c
@@ -71,17 +71,32 @@ GST_STATIC_PAD_TEMPLATE ("sink",
         "channels = (int) [ 1, 2 ], " "parsed = (boolean) true ")
     );
 
+typedef struct
+{
+  guint64 clip_start, clip_end;
+} GstMpg123AudioDecClipInfo;
+
+static void gst_mpg123_audio_dec_dispose (GObject * object);
 static gboolean gst_mpg123_audio_dec_start (GstAudioDecoder * dec);
 static gboolean gst_mpg123_audio_dec_stop (GstAudioDecoder * dec);
 static GstFlowReturn gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec
     * mpg123_decoder, unsigned char const *decoded_bytes,
-    size_t const num_decoded_bytes);
+    size_t num_decoded_bytes, guint64 clip_start, guint64 clip_end);
 static GstFlowReturn gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
     GstBuffer * input_buffer);
 static gboolean gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec,
     GstCaps * input_caps);
 static void gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard);
 
+static void gst_mpg123_audio_dec_push_clip_info
+    (GstMpg123AudioDec * mpg123_decoder, guint64 clip_start, guint64 clip_end);
+static void gst_mpg123_audio_dec_pop_oldest_clip_info (GstMpg123AudioDec *
+    mpg123_decoder, guint64 * clip_start, guint64 * clip_end);
+static void gst_mpg123_audio_dec_clear_clip_info_queue (GstMpg123AudioDec *
+    mpg123_decoder);
+static guint gst_mpg123_audio_dec_get_info_queue_size (GstMpg123AudioDec *
+    mpg123_decoder);
+
 G_DEFINE_TYPE (GstMpg123AudioDec, gst_mpg123_audio_dec, GST_TYPE_AUDIO_DECODER);
 GST_ELEMENT_REGISTER_DEFINE (mpg123audiodec, "mpg123audiodec",
     GST_RANK_MARGINAL, GST_TYPE_MPG123_AUDIO_DEC);
@@ -89,6 +104,7 @@ GST_ELEMENT_REGISTER_DEFINE (mpg123audiodec, "mpg123audiodec",
 static void
 gst_mpg123_audio_dec_class_init (GstMpg123AudioDecClass * klass)
 {
+  GObjectClass *object_class;
   GstAudioDecoderClass *base_class;
   GstElementClass *element_class;
   GstPadTemplate *src_template, *sink_template;
@@ -96,6 +112,7 @@ gst_mpg123_audio_dec_class_init (GstMpg123AudioDecClass * klass)
 
   GST_DEBUG_CATEGORY_INIT (mpg123_debug, "mpg123", 0, "mpg123 mp3 decoder");
 
+  object_class = G_OBJECT_CLASS (klass);
   base_class = GST_AUDIO_DECODER_CLASS (klass);
   element_class = GST_ELEMENT_CLASS (klass);
 
@@ -178,6 +195,7 @@ gst_mpg123_audio_dec_class_init (GstMpg123AudioDecClass * klass)
   gst_element_class_add_pad_template (element_class, sink_template);
   gst_element_class_add_pad_template (element_class, src_template);
 
+  object_class->dispose = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_dispose);
   base_class->start = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_start);
   base_class->stop = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_stop);
   base_class->handle_frame =
@@ -198,6 +216,9 @@ void
 gst_mpg123_audio_dec_init (GstMpg123AudioDec * mpg123_decoder)
 {
   mpg123_decoder->handle = NULL;
+  mpg123_decoder->audio_clip_info_queue =
+      gst_queue_array_new_for_struct (sizeof (GstMpg123AudioDecClipInfo), 16);
+
   gst_audio_decoder_set_needs_format (GST_AUDIO_DECODER (mpg123_decoder), TRUE);
   gst_audio_decoder_set_use_default_pad_acceptcaps (GST_AUDIO_DECODER_CAST
       (mpg123_decoder), TRUE);
@@ -205,6 +226,20 @@ gst_mpg123_audio_dec_init (GstMpg123AudioDec * mpg123_decoder)
 }
 
 
+static void
+gst_mpg123_audio_dec_dispose (GObject * object)
+{
+  GstMpg123AudioDec *mpg123_decoder = GST_MPG123_AUDIO_DEC (object);
+
+  if (mpg123_decoder->audio_clip_info_queue != NULL) {
+    gst_queue_array_free (mpg123_decoder->audio_clip_info_queue);
+    mpg123_decoder->audio_clip_info_queue = NULL;
+  }
+
+  G_OBJECT_CLASS (gst_mpg123_audio_dec_parent_class)->dispose (object);
+}
+
+
 static gboolean
 gst_mpg123_audio_dec_start (GstAudioDecoder * dec)
 {
@@ -271,6 +306,8 @@ gst_mpg123_audio_dec_stop (GstAudioDecoder * dec)
     mpg123_decoder->handle = NULL;
   }
 
+  gst_mpg123_audio_dec_clear_clip_info_queue (mpg123_decoder);
+
   GST_INFO_OBJECT (dec, "mpg123 decoder stopped");
 
   return TRUE;
@@ -279,7 +316,8 @@ gst_mpg123_audio_dec_stop (GstAudioDecoder * dec)
 
 static GstFlowReturn
 gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec * mpg123_decoder,
-    unsigned char const *decoded_bytes, size_t const num_decoded_bytes)
+    unsigned char const *decoded_bytes, size_t num_decoded_bytes,
+    guint64 clip_start, guint64 clip_end)
 {
   GstBuffer *output_buffer;
   GstAudioDecoder *dec;
@@ -287,15 +325,31 @@ gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec * mpg123_decoder,
   output_buffer = NULL;
   dec = GST_AUDIO_DECODER (mpg123_decoder);
 
-  if ((num_decoded_bytes == 0) || (decoded_bytes == NULL)) {
-    /* This occurs in the first few frames, which do not carry data; once
-     * MPG123_AUDIO_DEC_NEW_FORMAT is received, the empty frames stop occurring */
-    GST_DEBUG_OBJECT (mpg123_decoder,
-        "cannot decode yet, need more data -> no output buffer to push");
+  if (G_UNLIKELY ((num_decoded_bytes == 0) || (decoded_bytes == NULL))) {
+    /* This occurs in two cases:
+     *
+     * 1. The first few frames come in. These fill mpg123's buffers, and
+     *    do not immediately yield decoded output. This stops once the
+     *    mpg123_decode_frame () returns MPG123_NEW_FORMAT.
+     * 2. The decoder is being drained.
+     */
     return GST_FLOW_OK;
   }
 
-  output_buffer = gst_buffer_new_allocate (NULL, num_decoded_bytes, NULL);
+  if (G_UNLIKELY (clip_end >= num_decoded_bytes)) {
+    /* Fully-clipped frames still need to be finished, since they got
+     * decoded properly, they are just made of padding samples. */
+    GST_LOG_OBJECT (mpg123_decoder, "frame is fully clipped; "
+        "not pushing anything downstream");
+    return gst_audio_decoder_finish_frame (dec, NULL, 1);
+  }
+
+  /* Apply clipping. */
+  decoded_bytes += clip_start;
+  num_decoded_bytes -= clip_start + clip_end;
+
+  output_buffer = gst_audio_decoder_allocate_output_buffer (dec,
+      num_decoded_bytes);
 
   if (output_buffer == NULL) {
     /* This is necessary to advance playback in time,
@@ -327,115 +381,193 @@ gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
   unsigned char *decoded_bytes;
   size_t num_decoded_bytes;
   GstFlowReturn retval;
+  gboolean loop = TRUE;
 
   mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
 
   g_assert (mpg123_decoder->handle != NULL);
 
-  /* The actual decoding */
-  {
-    /* feed input data (if there is any) */
-    if (G_LIKELY (input_buffer != NULL)) {
-      GstMapInfo info;
+  /* Feed input data (if there is any) into mpg123. */
+  if (G_LIKELY (input_buffer != NULL)) {
+    GstMapInfo info;
+    GstAudioClippingMeta *clipping_meta = NULL;
 
-      if (gst_buffer_map (input_buffer, &info, GST_MAP_READ)) {
-        mpg123_feed (mpg123_decoder->handle, info.data, info.size);
-        gst_buffer_unmap (input_buffer, &info);
-      } else {
-        GST_AUDIO_DECODER_ERROR (mpg123_decoder, 1, RESOURCE, READ, (NULL),
-            ("gst_memory_map() failed"), retval);
-        return retval;
-      }
+    /* Drop any Xing/LAME header as marked from the parser. It's not parsed in
+     * this element and would decode to unnecessary silence samples. */
+    if (GST_BUFFER_FLAG_IS_SET (input_buffer, GST_BUFFER_FLAG_DECODE_ONLY) &&
+        GST_BUFFER_FLAG_IS_SET (input_buffer, GST_BUFFER_FLAG_DROPPABLE)) {
+      return gst_audio_decoder_finish_frame (dec, NULL, 1);
+    } else if (gst_buffer_map (input_buffer, &info, GST_MAP_READ)) {
+      GST_LOG_OBJECT (mpg123_decoder, "got new MPEG audio frame with %"
+          G_GSIZE_FORMAT " byte(s); feeding it into mpg123", info.size);
+      mpg123_feed (mpg123_decoder->handle, info.data, info.size);
+      gst_buffer_unmap (input_buffer, &info);
+    } else {
+      GST_AUDIO_DECODER_ERROR (mpg123_decoder, 1, RESOURCE, READ, (NULL),
+          ("gst_memory_map() failed; could not feed MPEG frame into mpg123"),
+          retval);
+      return retval;
     }
 
+    clipping_meta = gst_buffer_get_audio_clipping_meta (input_buffer);
+    if (clipping_meta != NULL) {
+      if (clipping_meta->format == GST_FORMAT_DEFAULT) {
+        /* Get clipping info and convert it to bytes. */
+        gint bpf = GST_AUDIO_INFO_BPF (&(mpg123_decoder->next_audioinfo));
+        guint64 clip_start = clipping_meta->start * bpf;
+        guint64 clip_end = clipping_meta->end * bpf;
+
+        /* Push the clipping info into the queue. We cannot use clipping info
+         * directly since mpg123 might not immediately be able to decode this
+         * MPEG frame. In other words, it queues the frames internally. To
+         * make sure we apply clipping properly, we therefore also have to
+         * queue the clipping info accordingly. */
+        gst_mpg123_audio_dec_push_clip_info (mpg123_decoder, clip_start,
+            clip_end);
+
+        GST_LOG_OBJECT (dec, "buffer has clipping metadata: start/end %"
+            G_GUINT64_FORMAT "/%" G_GUINT64_FORMAT " samples (= %"
+            G_GUINT64_FORMAT "/%" G_GUINT64_FORMAT " bytes); pushed it into "
+            "audio clip info queue (now has %u item(s))", clipping_meta->start,
+            clipping_meta->end, clip_start, clip_end,
+            gst_mpg123_audio_dec_get_info_queue_size (mpg123_decoder));
+      } else {
+        gst_mpg123_audio_dec_push_clip_info (mpg123_decoder, 0, 0);
+        GST_WARNING_OBJECT (dec,
+            "buffer has clipping metadata in unsupported format %s",
+            gst_format_get_name (clipping_meta->format));
+      }
+    } else {
+      gst_mpg123_audio_dec_push_clip_info (mpg123_decoder, 0, 0);
+    }
+  } else {
+    GST_LOG_OBJECT (dec, "got NULL pointer as input; "
+        "will drain mpg123 decoder");
+  }
+
+  retval = GST_FLOW_OK;
+
+  /* Keep trying to decode with mpg123 until it reports that,
+   * it is done, needs more data, or an error occurs. */
+  while (loop) {
+    guint64 clip_start = 0, clip_end = 0;
+
     /* Try to decode a frame */
     decoded_bytes = NULL;
     num_decoded_bytes = 0;
     decode_error = mpg123_decode_frame (mpg123_decoder->handle,
         &mpg123_decoder->frame_offset, &decoded_bytes, &num_decoded_bytes);
-  }
 
-  retval = GST_FLOW_OK;
+    if (G_LIKELY (decoded_bytes != NULL)) {
+      gst_mpg123_audio_dec_pop_oldest_clip_info (mpg123_decoder, &clip_start,
+          &clip_end);
 
-  switch (decode_error) {
-    case MPG123_NEW_FORMAT:
-      /* As mentioned in gst_mpg123_audio_dec_set_format(), the next audioinfo
-       * is not set immediately; instead, the code waits for mpg123 to take
-       * note of the new format, and then sets the audioinfo. This fixes glitches
-       * with mp3s containing several format headers (for example, first half
-       * using 44.1kHz, second half 32 kHz) */
-
-      GST_LOG_OBJECT (dec,
-          "mpg123 reported a new format -> setting next srccaps");
-
-      gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
-          num_decoded_bytes);
-
-      /* If there is a next audioinfo, use it, then set has_next_audioinfo to
-       * FALSE, to make sure gst_audio_decoder_set_output_format() isn't called
-       * again until set_format is called by the base class */
-      if (mpg123_decoder->has_next_audioinfo) {
-        if (!gst_audio_decoder_set_output_format (dec,
-                &(mpg123_decoder->next_audioinfo))) {
-          GST_WARNING_OBJECT (dec, "Unable to set output format");
-          retval = GST_FLOW_NOT_NEGOTIATED;
-        }
-        mpg123_decoder->has_next_audioinfo = FALSE;
+      if ((clip_start + clip_end) > 0) {
+        GST_LOG_OBJECT (dec, "retrieved clip info from queue; "
+            "will clip %" G_GUINT64_FORMAT " byte(s) at the start and %"
+            G_GUINT64_FORMAT " at the end of the decoded frame; queue now "
+            "has %u item(s)", clip_start, clip_end,
+            gst_mpg123_audio_dec_get_info_queue_size (mpg123_decoder));
       }
 
-      break;
-
-    case MPG123_NEED_MORE:
-    case MPG123_OK:
-      retval = gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder,
-          decoded_bytes, num_decoded_bytes);
-      break;
-
-    case MPG123_DONE:
-      /* If this happens, then the upstream parser somehow missed the ending
-       * of the bitstream */
-      GST_LOG_OBJECT (dec, "mpg123 is done decoding");
-      gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
+      GST_LOG_OBJECT (dec, "decoded %" G_GSIZE_FORMAT " byte(s)", (gsize)
           num_decoded_bytes);
-      retval = GST_FLOW_EOS;
-      break;
+    }
 
-    default:
-    {
-      /* Anything else is considered an error */
-      int errcode;
-      retval = GST_FLOW_ERROR;  /* use error by default */
-      switch (decode_error) {
-        case MPG123_ERR:
-          errcode = mpg123_errcode (mpg123_decoder->handle);
-          break;
-        default:
-          errcode = decode_error;
-      }
-      switch (errcode) {
-        case MPG123_BAD_OUTFORMAT:{
-          GstCaps *input_caps =
-              gst_pad_get_current_caps (GST_AUDIO_DECODER_SINK_PAD (dec));
-          GST_ELEMENT_ERROR (dec, STREAM, FORMAT, (NULL),
-              ("Output sample format could not be used when trying to decode frame. "
-                  "This is typically caused when the input caps (often the sample "
-                  "rate) do not match the actual format of the audio data. "
-                  "Input caps: %" GST_PTR_FORMAT, input_caps)
-              );
-          gst_caps_unref (input_caps);
-          break;
+    switch (decode_error) {
+      case MPG123_NEW_FORMAT:
+        /* As mentioned in gst_mpg123_audio_dec_set_format(), the next audioinfo
+         * is not set immediately; instead, the code waits for mpg123 to take
+         * note of the new format, and then sets the audioinfo. This fixes glitches
+         * with mp3s containing several format headers (for example, first half
+         * using 44.1kHz, second half 32 kHz) */
+
+        gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
+            num_decoded_bytes, clip_start, clip_end);
+
+        GST_LOG_OBJECT (dec,
+            "mpg123 reported a new format -> setting next srccaps");
+
+        /* If there is a next audioinfo, use it, then set has_next_audioinfo to
+         * FALSE, to make sure gst_audio_decoder_set_output_format() isn't called
+         * again until set_format is called by the base class */
+        if (mpg123_decoder->has_next_audioinfo) {
+          if (!gst_audio_decoder_set_output_format (dec,
+                  &(mpg123_decoder->next_audioinfo))) {
+            GST_WARNING_OBJECT (dec, "Unable to set output format");
+            retval = GST_FLOW_NOT_NEGOTIATED;
+            loop = FALSE;
+          }
+          mpg123_decoder->has_next_audioinfo = FALSE;
         }
-        default:{
-          char const *errmsg = mpg123_plain_strerror (errcode);
-          /* GST_AUDIO_DECODER_ERROR sets a new return value according to
-           * its estimations */
-          GST_AUDIO_DECODER_ERROR (mpg123_decoder, 1, STREAM, DECODE, (NULL),
-              ("mpg123 decoding error: %s", errmsg), retval);
+
+        break;
+
+      case MPG123_NEED_MORE:
+        loop = FALSE;
+        GST_LOG_OBJECT (dec, "mpg123 needs more data to continue decoding");
+        retval = gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder,
+            decoded_bytes, num_decoded_bytes, clip_start, clip_end);
+        break;
+
+      case MPG123_OK:
+        retval = gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder,
+            decoded_bytes, num_decoded_bytes, clip_start, clip_end);
+        break;
+
+      case MPG123_DONE:
+        /* If this happens, then the upstream parser somehow missed the ending
+         * of the bitstream */
+        gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
+            num_decoded_bytes, clip_start, clip_end);
+        GST_LOG_OBJECT (dec, "mpg123 is done decoding");
+        retval = GST_FLOW_EOS;
+        loop = FALSE;
+        break;
+
+      default:
+      {
+        /* Anything else is considered an error */
+        int errcode;
+
+        /* use error by default */
+        retval = GST_FLOW_ERROR;
+        loop = FALSE;
+
+        switch (decode_error) {
+          case MPG123_ERR:
+            errcode = mpg123_errcode (mpg123_decoder->handle);
+            break;
+          default:
+            errcode = decode_error;
+        }
+        switch (errcode) {
+          case MPG123_BAD_OUTFORMAT:{
+            GstCaps *input_caps =
+                gst_pad_get_current_caps (GST_AUDIO_DECODER_SINK_PAD (dec));
+            GST_ELEMENT_ERROR (dec, STREAM, FORMAT, (NULL),
+                ("Output sample format could not be used when trying to decode frame. "
+                    "This is typically caused when the input caps (often the sample "
+                    "rate) do not match the actual format of the audio data. "
+                    "Input caps: %" GST_PTR_FORMAT, (gpointer) input_caps)
+                );
+            gst_caps_unref (input_caps);
+            break;
+          }
+          default:{
+            char const *errmsg = mpg123_plain_strerror (errcode);
+            /* GST_AUDIO_DECODER_ERROR sets a new return value according to
+             * its estimations */
+            GST_AUDIO_DECODER_ERROR (mpg123_decoder, 1, STREAM, DECODE, (NULL),
+                ("mpg123 decoding error: %s", errmsg), retval);
+          }
         }
       }
     }
   }
 
+  GST_LOG_OBJECT (mpg123_decoder, "done handling frame");
+
   return retval;
 }
 
@@ -514,7 +646,7 @@ gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec, GstCaps * input_caps)
         format_str = g_value_get_string (format_value);
       } else {
         GST_ERROR_OBJECT (mpg123_decoder, "unexpected type for 'format' field "
-            "in caps structure %" GST_PTR_FORMAT, structure);
+            "in caps structure %" GST_PTR_FORMAT, (gpointer) structure);
         gst_caps_unref (allowed_srccaps);
         goto done;
       }
@@ -616,12 +748,55 @@ gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard)
   if (hard)
     mpg123_decoder->has_next_audioinfo = FALSE;
 
+  gst_mpg123_audio_dec_clear_clip_info_queue (mpg123_decoder);
+
   /* opening/closing feeds do not affect the format defined by the
    * mpg123_format() call that was made in gst_mpg123_audio_dec_set_format(),
    * and since the up/downstream caps are not expected to change here, no
    * mpg123_format() calls are done */
 }
 
+
+static void gst_mpg123_audio_dec_push_clip_info
+    (GstMpg123AudioDec * mpg123_decoder, guint64 clip_start, guint64 clip_end)
+{
+  GstMpg123AudioDecClipInfo clip_info = { clip_start, clip_end };
+  gst_queue_array_push_tail_struct (mpg123_decoder->audio_clip_info_queue,
+      &clip_info);
+}
+
+
+static void
+gst_mpg123_audio_dec_pop_oldest_clip_info (GstMpg123AudioDec *
+    mpg123_decoder, guint64 * clip_start, guint64 * clip_end)
+{
+  guint queue_length;
+  GstMpg123AudioDecClipInfo *clip_info;
+
+  queue_length = gst_mpg123_audio_dec_get_info_queue_size (mpg123_decoder);
+  if (queue_length == 0)
+    return;
+
+  clip_info =
+      gst_queue_array_pop_head_struct (mpg123_decoder->audio_clip_info_queue);
+
+  *clip_start = clip_info->clip_start;
+  *clip_end = clip_info->clip_end;
+}
+
+static void
+gst_mpg123_audio_dec_clear_clip_info_queue (GstMpg123AudioDec * mpg123_decoder)
+{
+  gst_queue_array_clear (mpg123_decoder->audio_clip_info_queue);
+}
+
+
+static guint
+gst_mpg123_audio_dec_get_info_queue_size (GstMpg123AudioDec * mpg123_decoder)
+{
+  return gst_queue_array_get_length (mpg123_decoder->audio_clip_info_queue);
+}
+
 static gboolean
 plugin_init (GstPlugin * plugin)
 {
diff --git a/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.h b/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.h
index e6c316bb2d..2da140d718 100644
--- a/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.h
+++ b/subprojects/gst-plugins-good/ext/mpg123/gstmpg123audiodec.h
@@ -20,6 +20,7 @@
 #define __GST_MPG123_AUDIO_DEC_H__
 
 #include <gst/gst.h>
+#include <gst/base/base.h>
 #include <gst/audio/gstaudiodecoder.h>
 #include <mpg123.h>
 
@@ -40,6 +41,8 @@ struct _GstMpg123AudioDec
   gboolean has_next_audioinfo;
 
   off_t frame_offset;
+
+  GstQueueArray *audio_clip_info_queue;
 };
 
 GST_ELEMENT_REGISTER_DECLARE (mpg123audiodec);
diff --git a/subprojects/gst-plugins-good/gst/audioparsers/gstmpegaudioparse.c b/subprojects/gst-plugins-good/gst/audioparsers/gstmpegaudioparse.c
index 521ed7ec3c..2165589636 100644
--- a/subprojects/gst-plugins-good/gst/audioparsers/gstmpegaudioparse.c
+++ b/subprojects/gst-plugins-good/gst/audioparsers/gstmpegaudioparse.c
@@ -98,7 +98,7 @@
  * backwards compatibility with older hardware MP3 players, but can be safely
  * dropped.
  *
- * For more about Xng header frames, see:
+ * For more about Xing header frames, see:
  * https://www.codeproject.com/Articles/8295/MPEG-Audio-Frame-Header#XINGHeader
  * https://www.compuphase.com/mp3/mp3loops.htm#PADDING_DELAYS
  *
diff --git a/subprojects/gst-plugins-good/tests/check/elements/mpg123audiodec.c b/subprojects/gst-plugins-good/tests/check/elements/mpg123audiodec.c
index 20d6e779dd..b163cd156e 100644
--- a/subprojects/gst-plugins-good/tests/check/elements/mpg123audiodec.c
+++ b/subprojects/gst-plugins-good/tests/check/elements/mpg123audiodec.c
@@ -42,6 +42,7 @@ static GstPad *mysrcpad, *mysinkpad;
 #define MP2_STREAM_FILENAME "stream.mp2"
 #define MP3_CBR_STREAM_FILENAME "cbr_stream.mp3"
 #define MP3_VBR_STREAM_FILENAME "vbr_stream.mp3"
+#define MP3_GAPLESS_STREAM_FILENAME "sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3"
 
 
 /* mpeg 1 layer 2 stream created with:
@@ -220,7 +221,7 @@ setup_mpeg1layer2dec (void)
 }
 
 static GstElement *
-setup_mpeg1layer3dec (void)
+setup_mpeg1layer3dec (gint sample_rate)
 {
   GstElement *mpg123audiodec;
   GstCaps *caps;
@@ -237,7 +238,7 @@ setup_mpeg1layer3dec (void)
   caps = gst_caps_new_simple ("audio/mpeg",
       "mpegversion", G_TYPE_INT, 1,
       "layer", G_TYPE_INT, 3,
-      "rate", G_TYPE_INT, 44100,
+      "rate", G_TYPE_INT, sample_rate,
       "channels", G_TYPE_INT, 1, "parsed", G_TYPE_BOOLEAN, TRUE, NULL);
   gst_check_setup_events (mysrcpad, mpg123audiodec, caps, GST_FORMAT_TIME);
   gst_caps_unref (caps);
@@ -300,7 +301,7 @@ run_decoding_test (GstElement * mpg123audiodec, gchar const *filename)
 
     /* This is done to be on the safe side - docs say lifetime of the input buffer
      * depends *solely* on the sample */
-    input_buffer = gst_buffer_copy (input_buffer);
+    input_buffer = gst_buffer_ref (input_buffer);
 
     fail_unless_equals_int (gst_pad_push (mysrcpad, input_buffer), GST_FLOW_OK);
 
@@ -312,7 +313,7 @@ run_decoding_test (GstElement * mpg123audiodec, gchar const *filename)
   num_decoded_buffers = g_list_length (buffers);
 
   /* check number of decoded buffers */
-  fail_unless_equals_int (num_decoded_buffers, num_input_buffers - 2);
+  fail_unless_equals_int (num_decoded_buffers, num_input_buffers);
 
   caps = gst_pad_get_current_caps (mysinkpad);
   GST_LOG ("output caps %" GST_PTR_FORMAT, caps);
@@ -333,6 +334,7 @@ run_decoding_test (GstElement * mpg123audiodec, gchar const *filename)
   /* here, test if decoded data is a sine tone, and if the sine frequency is at the
    * right spot in the spectrum */
   for (i = 0; i < num_decoded_buffers; ++i) {
+    fail_if (buffers == NULL);
     outbuffer = GST_BUFFER (buffers->data);
     fail_if (outbuffer == NULL, "Invalid buffer retrieved");
 
@@ -342,13 +344,12 @@ run_decoding_test (GstElement * mpg123audiodec, gchar const *filename)
 
     check_main_frequency_spot_S32 (outbuffer, expected_frequency_spot);
 
-    buffers = g_list_remove (buffers, outbuffer);
+    buffers = g_list_delete_link (buffers, buffers);
     gst_buffer_unref (outbuffer);
     outbuffer = NULL;
   }
 
-  g_list_free (buffers);
-  buffers = NULL;
+  fail_unless (buffers == NULL);
 
   cleanup_input_pipeline (input_pipeline);
   gst_bus_set_flushing (bus, TRUE);
@@ -372,7 +373,7 @@ GST_END_TEST;
 GST_START_TEST (test_decode_mpeg1layer3_cbr)
 {
   GstElement *mpg123audiodec;
-  mpg123audiodec = setup_mpeg1layer3dec ();
+  mpg123audiodec = setup_mpeg1layer3dec (44100);
   run_decoding_test (mpg123audiodec, MP3_CBR_STREAM_FILENAME);
   cleanup_mpg123audiodec (mpg123audiodec);
 }
@@ -383,7 +384,7 @@ GST_END_TEST;
 GST_START_TEST (test_decode_mpeg1layer3_vbr)
 {
   GstElement *mpg123audiodec;
-  mpg123audiodec = setup_mpeg1layer3dec ();
+  mpg123audiodec = setup_mpeg1layer3dec (44100);
   run_decoding_test (mpg123audiodec, MP3_VBR_STREAM_FILENAME);
   cleanup_mpg123audiodec (mpg123audiodec);
 }
@@ -391,6 +392,117 @@ GST_START_TEST (test_decode_mpeg1layer3_vbr)
 GST_END_TEST;
 
 
+GST_START_TEST (test_decode_mpeg1layer3_gapless)
+{
+  GstBus *bus;
+  guint num_decoded_buffers;
+  guint num_decoded_pcm_frames;
+  GstCaps *out_caps, *caps;
+  GstAudioInfo audioinfo;
+  GstElement *input_pipeline, *input_appsink;
+  int i;
+  GstBuffer *outbuffer;
+  GstElement *mpg123audiodec;
+
+  /* 440 Hz = frequency of sine wave in audio data
+   * 32000 Hz = sample rate
+   * (32000 / 2) Hz = Nyquist frequency */
+  static double const expected_frequency_spot = 440.0 / (32000.0 / 2.0);
+
+  mpg123audiodec = setup_mpeg1layer3dec (32000);
+
+  fail_unless (gst_element_set_state (mpg123audiodec,
+          GST_STATE_PLAYING) == GST_STATE_CHANGE_SUCCESS,
+      "could not set to playing");
+  bus = gst_bus_new ();
+
+  gst_element_set_bus (mpg123audiodec, bus);
+
+  setup_input_pipeline (MP3_GAPLESS_STREAM_FILENAME, &input_pipeline,
+      &input_appsink);
+
+  while (TRUE) {
+    GstSample *sample;
+    GstBuffer *input_buffer;
+
+    sample = gst_app_sink_pull_sample (GST_APP_SINK (input_appsink));
+    if (sample == NULL)
+      break;
+
+    fail_unless (GST_IS_SAMPLE (sample));
+
+    input_buffer = gst_sample_get_buffer (sample);
+    fail_if (input_buffer == NULL);
+
+    /* This is done to be on the safe side - docs say lifetime of the input buffer
+     * depends *solely* on the sample */
+    input_buffer = gst_buffer_ref (input_buffer);
+
+    fail_unless_equals_int (gst_pad_push (mysrcpad, input_buffer), GST_FLOW_OK);
+
+    gst_sample_unref (sample);
+  }
+
+  num_decoded_buffers = g_list_length (buffers);
+
+  caps = gst_pad_get_current_caps (mysinkpad);
+  GST_LOG ("output caps %" GST_PTR_FORMAT, caps);
+  fail_unless (gst_audio_info_from_caps (&audioinfo, caps),
+      "Getting audio info from caps failed");
+
+  /* check caps */
+  out_caps = gst_caps_new_simple ("audio/x-raw",
+      "format", G_TYPE_STRING, GST_AUDIO_NE (S32),
+      "layout", G_TYPE_STRING, "interleaved",
+      "rate", G_TYPE_INT, 32000, "channels", G_TYPE_INT, 1, NULL);
+
+  fail_unless (gst_caps_is_equal_fixed (caps, out_caps), "Incorrect out caps");
+
+  gst_caps_unref (out_caps);
+  gst_caps_unref (caps);
+
+  /* This is the main check. We see how many PCM frames got decoded
+   * in total. If the amount is not what we expected, then gapless
+   * decoding failed, because padding samples have to be omitted
+   * in order for the playback to be really gapless. */
+  num_decoded_pcm_frames = 0;
+  for (i = 0; i < num_decoded_buffers; ++i) {
+    guint num_frames;
+
+    fail_if (buffers == NULL);
+    outbuffer = GST_BUFFER (buffers->data);
+    fail_if (outbuffer == NULL, "Invalid buffer retrieved");
+
+    num_frames =
+        gst_buffer_get_size (outbuffer) / GST_AUDIO_INFO_BPF (&audioinfo);
+    num_decoded_pcm_frames += num_frames;
+
+    /* Don't check the first frame for a sine wave, because it will
+     * unavoidably have a discontinuity at the beginning, causing the
+     * spectrum to be filled with additional peaks, so the FFT check
+     * will detect false positives. */
+    if (i != 0)
+      check_main_frequency_spot_S32 (outbuffer, expected_frequency_spot);
+
+    buffers = g_list_delete_link (buffers, buffers);
+    gst_buffer_unref (outbuffer);
+    outbuffer = NULL;
+  }
+
+  fail_unless_equals_int (num_decoded_pcm_frames, 32288);
+  fail_unless (buffers == NULL);
+
+  cleanup_input_pipeline (input_pipeline);
+  gst_bus_set_flushing (bus, TRUE);
+  gst_element_set_bus (mpg123audiodec, NULL);
+  gst_object_unref (GST_OBJECT (bus));
+
+  cleanup_mpg123audiodec (mpg123audiodec);
+}
+
+GST_END_TEST;
+
+
 GST_START_TEST (test_decode_garbage_mpeg1layer2)
 {
   GstElement *mpg123audiodec;
@@ -446,7 +558,7 @@ GST_START_TEST (test_decode_garbage_mpeg1layer3)
   int i, num_buffers;
   guint32 *tmpbuf;
 
-  mpg123audiodec = setup_mpeg1layer3dec ();
+  mpg123audiodec = setup_mpeg1layer3dec (44100);
 
   fail_unless (gst_element_set_state (mpg123audiodec,
           GST_STATE_PLAYING) == GST_STATE_CHANGE_SUCCESS,
@@ -490,14 +602,17 @@ is_test_file_available (gchar const *filename)
 {
   gboolean ret;
   gchar *full_filename;
-  gchar *cwd;
 
-  cwd = g_get_current_dir ();
-  full_filename = g_build_filename (cwd, GST_TEST_FILES_PATH, filename, NULL);
+  if (g_path_is_absolute (GST_TEST_FILES_PATH)) {
+    full_filename = g_build_filename (GST_TEST_FILES_PATH, filename, NULL);
+  } else {
+    gchar *cwd = g_get_current_dir ();
+    full_filename = g_build_filename (cwd, GST_TEST_FILES_PATH, filename, NULL);
+    g_free (cwd);
+  }
   ret =
       g_file_test (full_filename, G_FILE_TEST_IS_REGULAR | G_FILE_TEST_EXISTS);
   g_free (full_filename);
-  g_free (cwd);
   return ret;
 }
 
@@ -523,6 +638,8 @@ mpg123audiodec_suite (void)
       tcase_add_test (tc_chain, test_decode_mpeg1layer3_cbr);
     if (is_test_file_available (MP3_VBR_STREAM_FILENAME))
       tcase_add_test (tc_chain, test_decode_mpeg1layer3_vbr);
+    if (is_test_file_available (MP3_GAPLESS_STREAM_FILENAME))
+      tcase_add_test (tc_chain, test_decode_mpeg1layer3_gapless);
   }
   tcase_add_test (tc_chain, test_decode_garbage_mpeg1layer2);
   tcase_add_test (tc_chain, test_decode_garbage_mpeg1layer3);
diff --git a/tests/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3 b/subprojects/gst-plugins-good/tests/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3
similarity index 100%
rename from tests/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3
rename to subprojects/gst-plugins-good/tests/files/sine-1009ms-1ch-32000hz-gapless-with-lame-tag.mp3