Example pipelines

/*  MP3 decoding plugin for GStreamer using the mpg123 library
 *  Copyright (C) 2012 Carlos Rafael Giani
 *
 *  This library is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU Lesser General Public
 *  License as published by the Free Software Foundation; either
 *  version 2.1 of the License, or (at your option) any later version.
 *
 *  This library is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 *  Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public
 *  License along with this library; if not, write to the Free Software
 *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */

/**
 * SECTION: element-mpg123audiodec
 * @see_also: lamemp3enc, mad
 *
 * Audio decoder for MPEG-1 layer 1/2/3 audio data.
 *
 * <refsect2>
 * <title>Example pipelines</title>
 * |[
 * gst-launch filesrc location=music.mp3 ! mpegaudioparse ! mpg123audiodec ! audioconvert ! audioresample ! autoaudiosink
 * ]| Decode and play the mp3 file
 * </refsect2>
 */

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "gstmpg123audiodec.h"

#include <stdlib.h>
#include <string.h>

GST_DEBUG_CATEGORY_STATIC (mpg123_debug);
#define GST_CAT_DEFAULT mpg123_debug

/* Omitted sample formats that mpg123 supports (or at least can support):
 *  - 8bit integer signed
 *  - 8bit integer unsigned
 *  - a-law
 *  - mu-law
 *  - 64bit float
 *
 * The first four formats are not supported by the GstAudioDecoder base class.
 * (The internal gst_audio_format_from_caps_structure() call fails.)
 *
 * The 64bit float issue is tricky. mpg123 actually decodes to "real",
 * not necessarily to "float".
 *
 * "real" can be fixed point, 32bit float, 64bit float. There seems to be
 * no way how to find out which one of them is actually used.
 *
 * However, in all known installations, "real" equals 32bit float, so that's
 * what is used. */

static GstStaticPadTemplate static_sink_template =
GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("audio/mpeg, "
        "mpegversion = (int) { 1 }, "
        "layer = (int) [ 1, 3 ], "
        "rate = (int) { 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000 }, "
        "channels = (int) [ 1, 2 ], " "parsed = (boolean) true ")
    );

static gboolean gst_mpg123_audio_dec_start (GstAudioDecoder * dec);
static gboolean gst_mpg123_audio_dec_stop (GstAudioDecoder * dec);
static GstFlowReturn gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec
    * mpg123_decoder, unsigned char const *decoded_bytes,
    size_t const num_decoded_bytes);
static GstFlowReturn gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
    GstBuffer * input_buffer);
static gboolean gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec,
    GstCaps * input_caps);
static void gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard);

G_DEFINE_TYPE (GstMpg123AudioDec, gst_mpg123_audio_dec, GST_TYPE_AUDIO_DECODER);

static void
gst_mpg123_audio_dec_class_init (GstMpg123AudioDecClass * klass)
{
  GstAudioDecoderClass *base_class;
  GstElementClass *element_class;
  GstPadTemplate *src_template, *sink_template;
  int error;

  GST_DEBUG_CATEGORY_INIT (mpg123_debug, "mpg123", 0, "mpg123 mp3 decoder");

  base_class = GST_AUDIO_DECODER_CLASS (klass);
  element_class = GST_ELEMENT_CLASS (klass);

  gst_element_class_set_static_metadata (element_class,
      "mpg123 mp3 decoder",
      "Codec/Decoder/Audio",
      "Decodes mp3 streams using the mpg123 library",
      "Carlos Rafael Giani <dv@pseudoterminal.org>");

  /* Not using static pad template for srccaps, since the comma-separated list
   * of formats needs to be created depending on whatever mpg123 supports */
  {
    const int *format_list;
    const long *rates_list;
    size_t num, i;
    GString *s;
    GstCaps *src_template_caps;

    s = g_string_new ("audio/x-raw, ");

    mpg123_encodings (&format_list, &num);
    g_string_append (s, "format = { ");
    for (i = 0; i < num; ++i) {
      switch (format_list[i]) {
        case MPG123_ENC_SIGNED_16:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (S16));
          break;
        case MPG123_ENC_UNSIGNED_16:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (U16));
          break;
        case MPG123_ENC_SIGNED_24:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (S24));
          break;
        case MPG123_ENC_UNSIGNED_24:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (U24));
          break;
        case MPG123_ENC_SIGNED_32:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (S32));
          break;
        case MPG123_ENC_UNSIGNED_32:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (U32));
          break;
        case MPG123_ENC_FLOAT_32:
          g_string_append (s, (i > 0) ? ", " : "");
          g_string_append (s, GST_AUDIO_NE (F32));
          break;
        default:
          GST_DEBUG ("Ignoring mpg123 format %d", format_list[i]);
          break;
      }
    }
    g_string_append (s, " }, ");

    mpg123_rates (&rates_list, &num);
    g_string_append (s, "rate = (int) { ");
    for (i = 0; i < num; ++i) {
      g_string_append_printf (s, "%s%lu", (i > 0) ? ", " : "", rates_list[i]);
    }
    g_string_append (s, "}, ");

    g_string_append (s, "channels = (int) [ 1, 2 ], ");
    g_string_append (s, "layout = (string) interleaved");

    src_template_caps = gst_caps_from_string (s->str);
    src_template = gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
        src_template_caps);

    g_string_free (s, TRUE);
  }

  sink_template = gst_static_pad_template_get (&static_sink_template);

  gst_element_class_add_pad_template (element_class, sink_template);
  gst_element_class_add_pad_template (element_class, src_template);

  base_class->start = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_start);
  base_class->stop = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_stop);
  base_class->handle_frame =
      GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_handle_frame);
  base_class->set_format = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_set_format);
  base_class->flush = GST_DEBUG_FUNCPTR (gst_mpg123_audio_dec_flush);

  error = mpg123_init ();
  if (G_UNLIKELY (error != MPG123_OK))
    GST_ERROR ("Could not initialize mpg123 library: %s",
        mpg123_plain_strerror (error));
  else
    GST_INFO ("mpg123 library initialized");
}


void
gst_mpg123_audio_dec_init (GstMpg123AudioDec * mpg123_decoder)
{
  mpg123_decoder->handle = NULL;
}


static gboolean
gst_mpg123_audio_dec_start (GstAudioDecoder * dec)
{
  GstMpg123AudioDec *mpg123_decoder;
  int error;

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);
  error = 0;

  mpg123_decoder->handle = mpg123_new (NULL, &error);
  mpg123_decoder->has_next_audioinfo = FALSE;
  mpg123_decoder->frame_offset = 0;

  /* Initially, the mpg123 handle comes with a set of default formats
   * supported. This clears this set.  This is necessary, since only one
   * format shall be supported (see set_format for more). */
  mpg123_format_none (mpg123_decoder->handle);

  /* Built-in mpg123 support for gapless decoding is disabled for now,
   * since it does not work well with seeking */
  mpg123_param (mpg123_decoder->handle, MPG123_REMOVE_FLAGS, MPG123_GAPLESS, 0);
  /* Tells mpg123 to use a small read-ahead buffer for better MPEG sync;
   * essential for MP3 radio streams */
  mpg123_param (mpg123_decoder->handle, MPG123_ADD_FLAGS, MPG123_SEEKBUFFER, 0);
  /* Sets the resync limit to the end of the stream (otherwise mpg123 may give
   * up on decoding prematurely, especially with mp3 web radios) */
  mpg123_param (mpg123_decoder->handle, MPG123_RESYNC_LIMIT, -1, 0);
#if MPG123_API_VERSION >= 36
  /* The precise API version where MPG123_AUTO_RESAMPLE appeared is
   * somewhere between 29 and 36 */
  /* Don't let mpg123 resample output */
  mpg123_param (mpg123_decoder->handle, MPG123_REMOVE_FLAGS,
      MPG123_AUTO_RESAMPLE, 0);
#endif
  /* Don't let mpg123 print messages to stdout/stderr */
  mpg123_param (mpg123_decoder->handle, MPG123_ADD_FLAGS, MPG123_QUIET, 0);

  /* Open in feed mode (= encoded data is fed manually into the handle). */
  error = mpg123_open_feed (mpg123_decoder->handle);

  if (G_UNLIKELY (error != MPG123_OK)) {
    GST_ELEMENT_ERROR (dec, LIBRARY, INIT, (NULL),
        ("%s", mpg123_strerror (mpg123_decoder->handle)));
    mpg123_close (mpg123_decoder->handle);
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
    return FALSE;
  }

  GST_INFO_OBJECT (dec, "mpg123 decoder started");

  return TRUE;
}


static gboolean
gst_mpg123_audio_dec_stop (GstAudioDecoder * dec)
{
  GstMpg123AudioDec *mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  if (G_LIKELY (mpg123_decoder->handle != NULL)) {
    mpg123_close (mpg123_decoder->handle);
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
  }

  GST_INFO_OBJECT (dec, "mpg123 decoder stopped");

  return TRUE;
}


static GstFlowReturn
gst_mpg123_audio_dec_push_decoded_bytes (GstMpg123AudioDec * mpg123_decoder,
    unsigned char const *decoded_bytes, size_t const num_decoded_bytes)
{
  GstBuffer *output_buffer;
  GstAudioDecoder *dec;

  output_buffer = NULL;
  dec = GST_AUDIO_DECODER (mpg123_decoder);

  if ((num_decoded_bytes == 0) || (decoded_bytes == NULL)) {
    /* This occurs in the first few frames, which do not carry data; once
     * MPG123_AUDIO_DEC_NEW_FORMAT is received, the empty frames stop occurring */
    GST_DEBUG_OBJECT (mpg123_decoder,
        "cannot decode yet, need more data -> no output buffer to push");
    return GST_FLOW_OK;
  }

  output_buffer = gst_buffer_new_allocate (NULL, num_decoded_bytes, NULL);

  if (output_buffer == NULL) {
    /* This is necessary to advance playback in time,
     * even when nothing was decoded. */
    return gst_audio_decoder_finish_frame (dec, NULL, 1);
  } else {
    GstMapInfo info;

    if (gst_buffer_map (output_buffer, &info, GST_MAP_WRITE)) {
      memcpy (info.data, decoded_bytes, num_decoded_bytes);
      gst_buffer_unmap (output_buffer, &info);
    } else {
      GST_ERROR_OBJECT (mpg123_decoder, "gst_buffer_map() returned NULL");
      gst_buffer_unref (output_buffer);
      output_buffer = NULL;
    }

    return gst_audio_decoder_finish_frame (dec, output_buffer, 1);
  }
}


static GstFlowReturn
gst_mpg123_audio_dec_handle_frame (GstAudioDecoder * dec,
    GstBuffer * input_buffer)
{
  GstMpg123AudioDec *mpg123_decoder;
  int decode_error;
  unsigned char *decoded_bytes;
  size_t num_decoded_bytes;
  GstFlowReturn retval;

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  g_assert (mpg123_decoder->handle != NULL);

  /* The actual decoding */
  {
    /* feed input data (if there is any) */
    if (G_LIKELY (input_buffer != NULL)) {
      GstMapInfo info;

      if (gst_buffer_map (input_buffer, &info, GST_MAP_READ)) {
        mpg123_feed (mpg123_decoder->handle, info.data, info.size);
        gst_buffer_unmap (input_buffer, &info);
      } else {
        GST_ERROR_OBJECT (mpg123_decoder, "gst_memory_map() failed");
        return GST_FLOW_ERROR;
      }
    }

    /* Try to decode a frame */
    decoded_bytes = NULL;
    num_decoded_bytes = 0;
    decode_error = mpg123_decode_frame (mpg123_decoder->handle,
        &mpg123_decoder->frame_offset, &decoded_bytes, &num_decoded_bytes);
  }

  retval = GST_FLOW_OK;

  switch (decode_error) {
    case MPG123_NEW_FORMAT:
      /* As mentioned in gst_mpg123_audio_dec_set_format(), the next audioinfo
       * is not set immediately; instead, the code waits for mpg123 to take
       * note of the new format, and then sets the audioinfo. This fixes glitches
       * with mp3s containing several format headers (for example, first half
       * using 44.1kHz, second half 32 kHz) */

      GST_LOG_OBJECT (dec,
          "mpg123 reported a new format -> setting next srccaps");

      gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
          num_decoded_bytes);

      /* If there is a next audioinfo, use it, then set has_next_audioinfo to
       * FALSE, to make sure gst_audio_decoder_set_output_format() isn't called
       * again until set_format is called by the base class */
      if (mpg123_decoder->has_next_audioinfo) {
        if (!gst_audio_decoder_set_output_format (dec,
                &(mpg123_decoder->next_audioinfo))) {
          GST_WARNING_OBJECT (dec, "Unable to set output format");
          retval = GST_FLOW_NOT_NEGOTIATED;
        }
        mpg123_decoder->has_next_audioinfo = FALSE;
      }

      break;

    case MPG123_NEED_MORE:
    case MPG123_OK:
      retval = gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder,
          decoded_bytes, num_decoded_bytes);
      break;

    case MPG123_DONE:
      /* If this happens, then the upstream parser somehow missed the ending
       * of the bitstream */
      GST_LOG_OBJECT (dec, "mpg123 is done decoding");
      gst_mpg123_audio_dec_push_decoded_bytes (mpg123_decoder, decoded_bytes,
          num_decoded_bytes);
      retval = GST_FLOW_EOS;
      break;

    default:
    {
      /* Anything else is considered an error */
      int errcode;
      switch (decode_error) {
        case MPG123_ERR:
          errcode = mpg123_errcode (mpg123_decoder->handle);
          break;
        default:
          errcode = decode_error;
      }
      switch (errcode) {
        case MPG123_BAD_OUTFORMAT:{
          GstCaps *input_caps =
              gst_pad_get_current_caps (GST_AUDIO_DECODER_SINK_PAD (dec));
          GST_ELEMENT_ERROR (dec, STREAM, FORMAT, (NULL),
              ("Output sample format could not be used when trying to decode frame. "
                  "This is typically caused when the input caps (often the sample "
                  "rate) do not match the actual format of the audio data. "
                  "Input caps: %" GST_PTR_FORMAT, input_caps)
              );
          gst_caps_unref (input_caps);
          break;
        }
        default:{
          char const *errmsg = mpg123_plain_strerror (errcode);
          GST_ERROR_OBJECT (dec, "Reported error: %s", errmsg);
        }
      }

      retval = GST_FLOW_ERROR;
    }
  }

  return retval;
}


static gboolean
gst_mpg123_audio_dec_set_format (GstAudioDecoder * dec, GstCaps * input_caps)
{
/* Using the parsed information upstream, and the list of allowed caps
 * downstream, this code tries to find a suitable audio info. It is important
 * to keep in mind that the rate and number of channels should never deviate
 * from the one the bitstream has, otherwise mpg123 has to mix channels and/or
 * resample (and as its docs say, its internal resampler is very crude). The
 * sample format, however, can be chosen freely, because the MPEG specs do not
 * mandate any special format. Therefore, rate and number of channels are taken
 * from upstream (which parsed the MPEG frames, so the input_caps contain
 * exactly the rate and number of channels the bitstream actually has), while
 * the sample format is chosen by trying out all caps that are allowed by
 * downstream. This way, the output is adjusted to what the downstream prefers.
 *
 * Also, the new output audio info is not set immediately. Instead, it is
 * considered the "next audioinfo". The code waits for mpg123 to notice the new
 * format (= when mpg123_decode_frame() returns MPG123_AUDIO_DEC_NEW_FORMAT),
 * and then sets the next audioinfo. Otherwise, the next audioinfo is set too
 * soon, which may cause problems with mp3s containing several format headers.
 * One example would be an mp3 with the first 30 seconds using 44.1 kHz, then
 * the next 30 seconds using 32 kHz. Rare, but possible.
 *
 * STEPS:
 *
 * 1. get rate and channels from input_caps
 * 2. get allowed caps from src pad
 * 3. for each structure in allowed caps:
 * 3.1. take format
 * 3.2. if the combination of format with rate and channels is unsupported by
 *      mpg123, go to (3), or exit with error if there are no more structures
 *      to try
 * 3.3. create next audioinfo out of rate,channels,format, and exit
 */


  int rate, channels;
  GstMpg123AudioDec *mpg123_decoder;
  GstCaps *allowed_srccaps;
  guint structure_nr;
  gboolean match_found = FALSE;

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  g_assert (mpg123_decoder->handle != NULL);

  mpg123_decoder->has_next_audioinfo = FALSE;

  /* Get rate and channels from input_caps */
  {
    GstStructure *structure;
    gboolean err = FALSE;

    /* Only the first structure is used (multiple
     * input caps structures don't make sense */
    structure = gst_caps_get_structure (input_caps, 0);

    if (!gst_structure_get_int (structure, "rate", &rate)) {
      err = TRUE;
      GST_ERROR_OBJECT (dec, "Input caps do not have a rate value");
    }
    if (!gst_structure_get_int (structure, "channels", &channels)) {
      err = TRUE;
      GST_ERROR_OBJECT (dec, "Input caps do not have a channel value");
    }

    if (err)
      return FALSE;
  }

  /* Get the caps that are allowed by downstream */
  {
    GstCaps *allowed_srccaps_unnorm =
        gst_pad_get_allowed_caps (GST_AUDIO_DECODER_SRC_PAD (dec));
    allowed_srccaps = gst_caps_normalize (allowed_srccaps_unnorm);
  }

  /* Go through all allowed caps, pick the first one that matches */
  for (structure_nr = 0; structure_nr < gst_caps_get_size (allowed_srccaps);
      ++structure_nr) {
    GstStructure *structure;
    gchar const *format_str;
    GstAudioFormat format;
    int encoding;

    structure = gst_caps_get_structure (allowed_srccaps, structure_nr);

    format_str = gst_structure_get_string (structure, "format");
    if (format_str == NULL) {
      GST_DEBUG_OBJECT (dec, "Could not get format from src caps");
      continue;
    }

    format = gst_audio_format_from_string (format_str);
    if (format == GST_AUDIO_FORMAT_UNKNOWN) {
      GST_DEBUG_OBJECT (dec, "Unknown format %s", format_str);
      continue;
    }

    switch (format) {
      case GST_AUDIO_FORMAT_S16:
        encoding = MPG123_ENC_SIGNED_16;
        break;
      case GST_AUDIO_FORMAT_S24:
        encoding = MPG123_ENC_SIGNED_24;
        break;
      case GST_AUDIO_FORMAT_S32:
        encoding = MPG123_ENC_SIGNED_32;
        break;
      case GST_AUDIO_FORMAT_U16:
        encoding = MPG123_ENC_UNSIGNED_16;
        break;
      case GST_AUDIO_FORMAT_U24:
        encoding = MPG123_ENC_UNSIGNED_24;
        break;
      case GST_AUDIO_FORMAT_U32:
        encoding = MPG123_ENC_UNSIGNED_32;
        break;
      case GST_AUDIO_FORMAT_F32:
        encoding = MPG123_ENC_FLOAT_32;
        break;
      default:
        GST_DEBUG_OBJECT (dec,
            "Format %s in srccaps is not supported", format_str);
        continue;
    }

    {
      int err;

      /* Cleanup old formats & set new one */
      mpg123_format_none (mpg123_decoder->handle);
      err = mpg123_format (mpg123_decoder->handle, rate, channels, encoding);
      if (err != MPG123_OK) {
        GST_DEBUG_OBJECT (dec,
            "mpg123 cannot use caps %" GST_PTR_FORMAT
            " because mpg123_format() failed: %s", structure,
            mpg123_strerror (mpg123_decoder->handle));
        continue;
      }
    }

    gst_audio_info_init (&(mpg123_decoder->next_audioinfo));
    gst_audio_info_set_format (&(mpg123_decoder->next_audioinfo), format, rate,
        channels, NULL);
    GST_LOG_OBJECT (dec, "The next audio format is: %s, %u Hz, %u channels",
        format_str, rate, channels);
    mpg123_decoder->has_next_audioinfo = TRUE;

    match_found = TRUE;

    break;
  }

  gst_caps_unref (allowed_srccaps);

  return match_found;
}


static void
gst_mpg123_audio_dec_flush (GstAudioDecoder * dec, gboolean hard)
{
  int error;
  GstMpg123AudioDec *mpg123_decoder;

  GST_LOG_OBJECT (dec, "Flushing decoder");

  mpg123_decoder = GST_MPG123_AUDIO_DEC (dec);

  g_assert (mpg123_decoder->handle != NULL);

  /* Flush by reopening the feed */
  mpg123_close (mpg123_decoder->handle);
  error = mpg123_open_feed (mpg123_decoder->handle);

  if (G_UNLIKELY (error != MPG123_OK)) {
    GST_ELEMENT_ERROR (dec, LIBRARY, INIT, (NULL),
        ("Error while reopening mpg123 feed: %s",
            mpg123_plain_strerror (error)));
    mpg123_close (mpg123_decoder->handle);
    mpg123_delete (mpg123_decoder->handle);
    mpg123_decoder->handle = NULL;
  }

  mpg123_decoder->has_next_audioinfo = FALSE;

  /* opening/closing feeds do not affect the format defined by the
   * mpg123_format() call that was made in gst_mpg123_audio_dec_set_format(),
   * and since the up/downstream caps are not expected to change here, no
   * mpg123_format() calls are done */
}

static gboolean
plugin_init (GstPlugin * plugin)
{
  return gst_element_register (plugin, "mpg123audiodec",
      GST_RANK_MARGINAL, gst_mpg123_audio_dec_get_type ());
}

GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
    GST_VERSION_MINOR,
    mpg123, "mp3 decoding based on the mpg123 library",
    plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)