mediafoundation: Add support for AAC decoding

See also https://docs.microsoft.com/en-us/windows/win32/medfound/aac-decoder Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1596>
2025-01-02 21:48:55 +00:00 · 2022-01-27 02:20:37 +09:00 · 2022-01-27 02:20:37 +09:00 · be957f6c61
commit be957f6c61
parent 0b26254a6a
8 changed files with 966 additions and 2 deletions
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaacdec.cpp
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaacdec.cpp
@ -0,0 +1,380 @@
 /* GStreamer
 * Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 /**
 * SECTION:element-mfaacdec
 * @title: mfaacdec
 *
 * This element decodes AAC compressed data into RAW audio data.
 *
 * Since: 1.22
 *
 */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 #include <gst/gst.h>
 #include <gst/pbutils/pbutils.h>
 #include "gstmfaudiodecoder.h"
 #include "gstmfaacdec.h"
 #include <wrl.h>
 #include <string.h>
 /* *INDENT-OFF* */
 using namespace Microsoft::WRL;
 /* *INDENT-ON* */
 GST_DEBUG_CATEGORY (gst_mf_aac_dec_debug);
 #define GST_CAT_DEFAULT gst_mf_aac_dec_debug
 static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("audio/mpeg, "
        "mpegversion = (int) {2, 4}, "
        "stream-format = (string) raw, framed = (boolean) true, "
        "channels = (int) [1, 6], rate = (int) [8000, 48000]")
    );
 static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS ("audio/x-raw, "
        "format = (string) " GST_AUDIO_NE (S16) ", "
        "layout = (string) interleaved, "
        "channels = (int) [1, 6], rate = (int) [8000, 48000]")
    );
 typedef struct _GstMFAacDec
 {
  GstMFAudioDecoder parent;
 } GstMFAacDec;
 typedef struct _GstMFAacDecClass
 {
  GstMFAudioDecoderClass parent_class;
 } GstMFAacDecClass;
 static GTypeClass *parent_class = nullptr;
 static gboolean gst_mf_aac_dec_set_format (GstMFAudioDecoder * decoder,
    GstMFTransform * transform, GstCaps * caps);
 static void
 gst_mf_aac_dec_class_init (GstMFAacDecClass * klass, gpointer data)
 {
  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
  GstMFAudioDecoderClass *decoder_class = GST_MF_AUDIO_DECODER_CLASS (klass);
  GstMFAudioDecoderClassData *cdata = (GstMFAudioDecoderClassData *) data;
  gchar *long_name;
  parent_class = (GTypeClass *) g_type_class_peek_parent (klass);
  long_name = g_strdup_printf ("Media Foundation %s", cdata->device_name);
  gst_element_class_set_metadata (element_class, long_name,
      "Codec/Decoder/Audio",
      "Microsoft Media Foundation AAC Decoder",
      "Seungha Yang <seungha@centricular.com>");
  g_free (long_name);
  gst_element_class_add_static_pad_template (element_class, &sink_template);
  gst_element_class_add_static_pad_template (element_class, &src_template);
  decoder_class->set_format = GST_DEBUG_FUNCPTR (gst_mf_aac_dec_set_format);
  decoder_class->codec_id = MFAudioFormat_AAC;
  decoder_class->enum_flags = cdata->enum_flags;
  decoder_class->device_index = cdata->device_index;
  g_free (cdata->device_name);
  g_free (cdata);
 }
 static void
 gst_mf_aac_dec_init (GstMFAacDec * self)
 {
 }
 /* Portion of HEAACWAVEINFO struct after wfx field
 * plus 2 bytes AudioSpecificConfig() */
 typedef struct
 {
  WORD wPayloadType;
  WORD wAudioProfileLevelIndication;
  WORD wStructType;
  WORD wReserved1;
  DWORD dwReserved2;
  WORD AudioSpecificConfig;
 } AACWaveInfo;
 static gboolean
 gst_mf_aac_dec_set_format (GstMFAudioDecoder * decoder,
    GstMFTransform * transform, GstCaps * caps)
 {
  GstMFAacDec *self = (GstMFAacDec *) decoder;
  HRESULT hr;
  const GValue *value;
  GstStructure *structure;
  GstBuffer *codec_data;
  ComPtr < IMFMediaType > in_type;
  ComPtr < IMFMediaType > out_type;
  AACWaveInfo wave_info;
  GstMapInfo map_info;
  guint channels, rate;
  const guint8 *data;
  GstAudioInfo in_audio_info, out_audio_info;
  GList *output_list, *iter;
  GstCaps *out_caps;
  G_STATIC_ASSERT (sizeof (AACWaveInfo) >= 12);
  if (!gst_audio_info_from_caps (&in_audio_info, caps)) {
    GST_ERROR_OBJECT (self, "Failed to get audio info from caps");
    return FALSE;
  }
  structure = gst_caps_get_structure (caps, 0);
  value = gst_structure_get_value (structure, "codec_data");
  if (!value) {
    GST_ERROR_OBJECT (self, "Missing codec_data");
    return FALSE;
  }
  codec_data = gst_value_get_buffer (value);
  if (!codec_data || gst_buffer_get_size (codec_data) < 2) {
    GST_ERROR_OBJECT (self, "Invalid codec_data");
    return FALSE;
  }
  if (!gst_buffer_map (codec_data, &map_info, GST_MAP_READ)) {
    GST_ERROR_OBJECT (self, "Invalid codec_data buffer");
    return FALSE;
  }
  data = (guint8 *) map_info.data;
  channels = gst_codec_utils_aac_get_channels (data, map_info.size);
  rate = gst_codec_utils_aac_get_sample_rate (data, map_info.size);
  /* Fallback to channels/rate values specified in caps */
  if (channels == 0)
    channels = in_audio_info.channels;
  if (rate == 0)
    rate = in_audio_info.rate;
  memset (&wave_info, 0, sizeof (AACWaveInfo));
  wave_info.wAudioProfileLevelIndication = 0xfe;
  memcpy (&wave_info.AudioSpecificConfig, data, 2);
  hr = MFCreateMediaType (&in_type);
  if (!gst_mf_result (hr))
    return FALSE;
  hr = in_type->SetGUID (MF_MT_MAJOR_TYPE, MFMediaType_Audio);
  if (!gst_mf_result (hr))
    return FALSE;
  hr = in_type->SetGUID (MF_MT_SUBTYPE, MFAudioFormat_AAC);
  if (!gst_mf_result (hr))
    return FALSE;
  hr = in_type->SetUINT32 (MF_MT_AAC_PAYLOAD_TYPE, 0);
  if (!gst_mf_result (hr))
    return FALSE;
  hr = in_type->SetUINT32 (MF_MT_AUDIO_NUM_CHANNELS, channels);
  if (!gst_mf_result (hr))
    return FALSE;
  hr = in_type->SetUINT32 (MF_MT_AUDIO_SAMPLES_PER_SECOND, rate);
  if (!gst_mf_result (hr))
    return FALSE;
  /* FIXME: should parse this somehow? */
  hr = in_type->SetUINT32 (MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION, 0xfe);
  if (!gst_mf_result (hr))
    return FALSE;
  hr = in_type->SetBlob (MF_MT_USER_DATA, (UINT8 *) & wave_info, 12);
  if (!gst_mf_result (hr))
    return FALSE;
  if (!gst_mf_transform_set_input_type (transform, in_type.Get ())) {
    GST_ERROR_OBJECT (self, "Failed to set format");
    return FALSE;
  }
  if (!gst_mf_transform_get_output_available_types (transform, &output_list)) {
    GST_ERROR_OBJECT (self, "Failed to get output types");
    return FALSE;
  }
  for (iter = output_list; iter; iter = g_list_next (iter)) {
    GUID guid;
    IMFMediaType *type = (IMFMediaType *) iter->data;
    UINT32 bps;
    hr = type->GetGUID (MF_MT_MAJOR_TYPE, &guid);
    if (!gst_mf_result (hr))
      continue;
    if (!IsEqualGUID (guid, MFMediaType_Audio))
      continue;
    hr = type->GetGUID (MF_MT_SUBTYPE, &guid);
    if (!gst_mf_result (hr))
      continue;
    if (!IsEqualGUID (guid, MFAudioFormat_PCM))
      continue;
    hr = type->GetUINT32 (MF_MT_AUDIO_BITS_PER_SAMPLE, &bps);
    if (!gst_mf_result (hr))
      continue;
    if (bps != 16)
      continue;
    out_type = type;
    break;
  }
  g_list_free_full (output_list, (GDestroyNotify) gst_mf_media_type_release);
  if (!out_type) {
    GST_ERROR_OBJECT (self, "Failed to select output type");
    return FALSE;
  }
  if (!gst_mf_transform_set_output_type (transform, out_type.Get ())) {
    GST_ERROR_OBJECT (self, "Failed to select output type");
    return FALSE;
  }
  out_caps = gst_mf_media_type_to_caps (out_type.Get ());
  if (!out_caps) {
    GST_ERROR_OBJECT (self, "Failed to get output caps");
    return FALSE;
  }
  GST_DEBUG_OBJECT (self, "Output caps %" GST_PTR_FORMAT, out_caps);
  if (!gst_audio_info_from_caps (&out_audio_info, out_caps)) {
    GST_ERROR_OBJECT (self,
        "Failed to convert caps to audio info %" GST_PTR_FORMAT, out_caps);
    gst_caps_unref (out_caps);
  }
  gst_caps_unref (out_caps);
  return gst_audio_decoder_set_output_format (GST_AUDIO_DECODER (self),
      &out_audio_info);
 }
 static void
 gst_mf_aac_dec_register (GstPlugin * plugin, guint rank,
    const gchar * device_name, guint32 enum_flags, guint device_index)
 {
  GType type;
  GstMFAudioDecoderClassData *cdata;
  GTypeInfo type_info = {
    sizeof (GstMFAacDecClass),
    nullptr,
    nullptr,
    (GClassInitFunc) gst_mf_aac_dec_class_init,
    nullptr,
    nullptr,
    sizeof (GstMFAacDec),
    0,
    (GInstanceInitFunc) gst_mf_aac_dec_init,
  };
  cdata = g_new0 (GstMFAudioDecoderClassData, 1);
  cdata->device_name = g_strdup (device_name);
  cdata->enum_flags = enum_flags;
  cdata->device_index = device_index;
  type_info.class_data = cdata;
  type = g_type_register_static (GST_TYPE_MF_AUDIO_DECODER, "GstMFAacDec",
      &type_info, (GTypeFlags) 0);
  if (!gst_element_register (plugin, "mfaacdec", rank, type))
    GST_WARNING ("Failed to register plugin");
 }
 static gboolean
 gst_mf_aac_dec_plugin_init_internal (GstPlugin * plugin, guint rank,
    GstMFTransform * transform, guint device_index, guint32 enum_flags)
 {
  gchar *device_name = nullptr;
  if (!gst_mf_transform_open (transform))
    return FALSE;
  g_object_get (transform, "device-name", &device_name, nullptr);
  if (!device_name) {
    GST_WARNING_OBJECT (transform, "Unknown device name");
    return FALSE;
  }
  gst_mf_aac_dec_register (plugin, rank, device_name, enum_flags, device_index);
  g_free (device_name);
  return TRUE;
 }
 void
 gst_mf_aac_dec_plugin_init (GstPlugin * plugin, guint rank)
 {
  GstMFTransformEnumParams enum_params = { 0, };
  MFT_REGISTER_TYPE_INFO input_type;
  GstMFTransform *transform;
  gint i;
  gboolean do_next;
  GST_DEBUG_CATEGORY_INIT (gst_mf_aac_dec_debug, "mfaacdec", 0, "mfaacdec");
  input_type.guidMajorType = MFMediaType_Audio;
  input_type.guidSubtype = MFAudioFormat_AAC;
  enum_params.category = MFT_CATEGORY_AUDIO_DECODER;
  enum_params.enum_flags = (MFT_ENUM_FLAG_SYNCMFT |
      MFT_ENUM_FLAG_SORTANDFILTER | MFT_ENUM_FLAG_SORTANDFILTER_APPROVED_ONLY);
  enum_params.input_typeinfo = &input_type;
  i = 0;
  do {
    enum_params.device_index = i++;
    transform = gst_mf_transform_new (&enum_params);
    do_next = TRUE;
    if (!transform) {
      do_next = FALSE;
    } else {
      if (gst_mf_aac_dec_plugin_init_internal (plugin, rank, transform,
              enum_params.device_index, enum_params.enum_flags)) {
        do_next = FALSE;
      }
      gst_clear_object (&transform);
    }
  } while (do_next);
 }
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaacdec.h
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaacdec.h
@ -0,0 +1,30 @@
 /* GStreamer
 * Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 #pragma once
 #include <gst/gst.h>
 G_BEGIN_DECLS
 void gst_mf_aac_dec_plugin_init (GstPlugin * plugin,
                                 guint rank);
 G_END_DECLS
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaudiodecoder.cpp
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaudiodecoder.cpp
@ -0,0 +1,285 @@
 /* GStreamer
 * Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 #include <gst/gst.h>
 #include "gstmfaudiodecoder.h"
 #include <wrl.h>
 #include <string.h>
 /* *INDENT-OFF* */
 using namespace Microsoft::WRL;
 /* *INDENT-ON* */
 GST_DEBUG_CATEGORY (gst_mf_audio_decoder_debug);
 #define GST_CAT_DEFAULT gst_mf_audio_decoder_debug
 #define gst_mf_audio_decoder_parent_class parent_class
 G_DEFINE_ABSTRACT_TYPE_WITH_CODE (GstMFAudioDecoder, gst_mf_audio_decoder,
    GST_TYPE_AUDIO_DECODER,
    GST_DEBUG_CATEGORY_INIT (gst_mf_audio_decoder_debug, "mfaudiodecoder", 0,
        "mfaudiodecoder"));
 static gboolean gst_mf_audio_decoder_open (GstAudioDecoder * dec);
 static gboolean gst_mf_audio_decoder_close (GstAudioDecoder * dec);
 static gboolean gst_mf_audio_decoder_set_format (GstAudioDecoder * dec,
    GstCaps * caps);
 static GstFlowReturn gst_mf_audio_decoder_handle_frame (GstAudioDecoder * dec,
    GstBuffer * buffer);
 static GstFlowReturn gst_mf_audio_decoder_drain (GstAudioDecoder * dec);
 static void gst_mf_audio_decoder_flush (GstAudioDecoder * dec, gboolean hard);
 static void
 gst_mf_audio_decoder_class_init (GstMFAudioDecoderClass * klass)
 {
  GstAudioDecoderClass *audiodec_class = GST_AUDIO_DECODER_CLASS (klass);
  audiodec_class->open = GST_DEBUG_FUNCPTR (gst_mf_audio_decoder_open);
  audiodec_class->close = GST_DEBUG_FUNCPTR (gst_mf_audio_decoder_close);
  audiodec_class->set_format =
      GST_DEBUG_FUNCPTR (gst_mf_audio_decoder_set_format);
  audiodec_class->handle_frame =
      GST_DEBUG_FUNCPTR (gst_mf_audio_decoder_handle_frame);
  audiodec_class->flush = GST_DEBUG_FUNCPTR (gst_mf_audio_decoder_flush);
  gst_type_mark_as_plugin_api (GST_TYPE_MF_AUDIO_DECODER,
      (GstPluginAPIFlags) 0);
 }
 static void
 gst_mf_audio_decoder_init (GstMFAudioDecoder * self)
 {
  gst_audio_decoder_set_drainable (GST_AUDIO_DECODER (self), TRUE);
 }
 static gboolean
 gst_mf_audio_decoder_open (GstAudioDecoder * dec)
 {
  GstMFAudioDecoder *self = GST_MF_AUDIO_DECODER (dec);
  GstMFAudioDecoderClass *klass = GST_MF_AUDIO_DECODER_GET_CLASS (dec);
  GstMFTransformEnumParams enum_params = { 0, };
  MFT_REGISTER_TYPE_INFO input_type;
  input_type.guidMajorType = MFMediaType_Audio;
  input_type.guidSubtype = klass->codec_id;
  enum_params.category = MFT_CATEGORY_AUDIO_DECODER;
  enum_params.enum_flags = klass->enum_flags;
  enum_params.input_typeinfo = &input_type;
  enum_params.device_index = klass->device_index;
  GST_DEBUG_OBJECT (self, "Create MFT with enum flags 0x%x, device index %d",
      klass->enum_flags, klass->device_index);
  self->transform = gst_mf_transform_new (&enum_params);
  if (!self->transform) {
    GST_ERROR_OBJECT (self, "Cannot create MFT object");
    return FALSE;
  }
  return TRUE;
 }
 static gboolean
 gst_mf_audio_decoder_close (GstAudioDecoder * dec)
 {
  GstMFAudioDecoder *self = GST_MF_AUDIO_DECODER (dec);
  gst_clear_object (&self->transform);
  return TRUE;
 }
 static gboolean
 gst_mf_audio_decoder_set_format (GstAudioDecoder * dec, GstCaps * caps)
 {
  GstMFAudioDecoder *self = GST_MF_AUDIO_DECODER (dec);
  GstMFAudioDecoderClass *klass = GST_MF_AUDIO_DECODER_GET_CLASS (dec);
  g_assert (klass->set_format != nullptr);
  GST_DEBUG_OBJECT (self, "Set format");
  gst_mf_audio_decoder_drain (dec);
  if (!gst_mf_transform_open (self->transform)) {
    GST_ERROR_OBJECT (self, "Failed to open MFT");
    return FALSE;
  }
  if (!klass->set_format (self, self->transform, caps)) {
    GST_ERROR_OBJECT (self, "Failed to set format");
    return FALSE;
  }
  return TRUE;
 }
 static gboolean
 gst_mf_audio_decoder_process_input (GstMFAudioDecoder * self,
    GstBuffer * buffer)
 {
  HRESULT hr;
  ComPtr < IMFSample > sample;
  ComPtr < IMFMediaBuffer > media_buffer;
  BYTE *data;
  gboolean res = FALSE;
  GstMapInfo info;
  if (!gst_buffer_map (buffer, &info, GST_MAP_READ)) {
    GST_ELEMENT_ERROR (self,
        RESOURCE, READ, ("Couldn't map input buffer"), (nullptr));
    return FALSE;
  }
  GST_TRACE_OBJECT (self, "Process buffer %" GST_PTR_FORMAT, buffer);
  hr = MFCreateSample (&sample);
  if (!gst_mf_result (hr))
    goto done;
  hr = MFCreateMemoryBuffer (info.size, &media_buffer);
  if (!gst_mf_result (hr))
    goto done;
  hr = media_buffer->Lock (&data, nullptr, nullptr);
  if (!gst_mf_result (hr))
    goto done;
  memcpy (data, info.data, info.size);
  media_buffer->Unlock ();
  hr = media_buffer->SetCurrentLength (info.size);
  if (!gst_mf_result (hr))
    goto done;
  hr = sample->AddBuffer (media_buffer.Get ());
  if (!gst_mf_result (hr))
    goto done;
  if (!gst_mf_transform_process_input (self->transform, sample.Get ())) {
    GST_ERROR_OBJECT (self, "Failed to process input");
    goto done;
  }
  res = TRUE;
 done:
  gst_buffer_unmap (buffer, &info);
  return res;
 }
 static GstFlowReturn
 gst_mf_audio_decoder_process_output (GstMFAudioDecoder * self)
 {
  HRESULT hr;
  BYTE *data = nullptr;
  ComPtr < IMFMediaBuffer > media_buffer;
  ComPtr < IMFSample > sample;
  GstBuffer *buffer;
  GstFlowReturn res = GST_FLOW_ERROR;
  DWORD buffer_len = 0;
  res = gst_mf_transform_get_output (self->transform, &sample);
  if (res != GST_FLOW_OK)
    return res;
  hr = sample->GetBufferByIndex (0, &media_buffer);
  if (!gst_mf_result (hr))
    return GST_FLOW_ERROR;
  hr = media_buffer->Lock (&data, nullptr, &buffer_len);
  if (!gst_mf_result (hr))
    return GST_FLOW_ERROR;
  /* Can happen while draining */
  if (buffer_len == 0 || !data) {
    GST_DEBUG_OBJECT (self, "Empty media buffer");
    media_buffer->Unlock ();
    return GST_FLOW_OK;
  }
  buffer = gst_audio_decoder_allocate_output_buffer (GST_AUDIO_DECODER (self),
      buffer_len);
  gst_buffer_fill (buffer, 0, data, buffer_len);
  media_buffer->Unlock ();
  return gst_audio_decoder_finish_frame (GST_AUDIO_DECODER (self), buffer, 1);
 }
 static GstFlowReturn
 gst_mf_audio_decoder_handle_frame (GstAudioDecoder * dec, GstBuffer * buffer)
 {
  GstMFAudioDecoder *self = GST_MF_AUDIO_DECODER (dec);
  GstFlowReturn ret;
  if (!buffer)
    return gst_mf_audio_decoder_drain (dec);
  if (!gst_mf_audio_decoder_process_input (self, buffer)) {
    GST_ERROR_OBJECT (self, "Failed to process input");
    return GST_FLOW_ERROR;
  }
  do {
    ret = gst_mf_audio_decoder_process_output (self);
  } while (ret == GST_FLOW_OK);
  if (ret == GST_MF_TRANSFORM_FLOW_NEED_DATA)
    ret = GST_FLOW_OK;
  return ret;
 }
 static GstFlowReturn
 gst_mf_audio_decoder_drain (GstAudioDecoder * dec)
 {
  GstMFAudioDecoder *self = GST_MF_AUDIO_DECODER (dec);
  GstFlowReturn ret = GST_FLOW_OK;
  if (!self->transform)
    return GST_FLOW_OK;
  gst_mf_transform_drain (self->transform);
  do {
    ret = gst_mf_audio_decoder_process_output (self);
  } while (ret == GST_FLOW_OK);
  if (ret == GST_MF_TRANSFORM_FLOW_NEED_DATA)
    ret = GST_FLOW_OK;
  return ret;
 }
 static void
 gst_mf_audio_decoder_flush (GstAudioDecoder * dec, gboolean hard)
 {
  GstMFAudioDecoder *self = GST_MF_AUDIO_DECODER (dec);
  if (!self->transform)
    return;
  gst_mf_transform_flush (self->transform);
 }
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaudiodecoder.h
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfaudiodecoder.h
@ -0,0 +1,69 @@
 /* GStreamer
 * Copyright (C) 2022 Seungha Yang <seungha@centricular.com>
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Library General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Library General Public License for more details.
 *
 * You should have received a copy of the GNU Library General Public
 * License along with this library; if not, write to the
 * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
 * Boston, MA 02110-1301, USA.
 */
 #pragma once
 #include <gst/gst.h>
 #include <gst/audio/audio.h>
 #include "gstmfutils.h"
 #include "gstmftransform.h"
 G_BEGIN_DECLS
 #define GST_TYPE_MF_AUDIO_DECODER           (gst_mf_audio_decoder_get_type())
 #define GST_MF_AUDIO_DECODER(obj)           (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_MF_AUDIO_DECODER,GstMFAudioDecoder))
 #define GST_MF_AUDIO_DECODER_CLASS(klass)   (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_MF_AUDIO_DECODER,GstMFAudioDecoderClass))
 #define GST_MF_AUDIO_DECODER_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_MF_AUDIO_DECODER,GstMFAudioDecoderClass))
 #define GST_IS_MF_AUDIO_DECODER(obj)        (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_MF_AUDIO_DECODER))
 #define GST_IS_MF_AUDIO_DECODER_CLASS(obj)  (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_MF_AUDIO_DECODER))
 typedef struct _GstMFAudioDecoder GstMFAudioDecoder;
 typedef struct _GstMFAudioDecoderClass GstMFAudioDecoderClass;
 typedef struct
 {
  gchar * device_name;
  guint32 enum_flags;
  guint device_index;
 } GstMFAudioDecoderClassData;
 struct _GstMFAudioDecoder
 {
  GstAudioDecoder parent;
  GstMFTransform *transform;
 };
 struct _GstMFAudioDecoderClass
 {
  GstAudioDecoderClass parent_class;
  GUID codec_id;
  guint32 enum_flags;
  guint device_index;
  gboolean (*set_format)      (GstMFAudioDecoder * decoder,
                               GstMFTransform * transform,
                               GstCaps * caps);
 };
 GType gst_mf_audio_decoder_get_type (void);
 G_END_DECLS
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfutils.cpp
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfutils.cpp
@ -343,6 +343,198 @@ gst_mf_media_type_to_video_caps (IMFMediaType * media_type)
  return caps;
 }
 /* Desktop only defines */
 #ifndef KSAUDIO_SPEAKER_MONO
 #define KSAUDIO_SPEAKER_MONO            (SPEAKER_FRONT_CENTER)
 #endif
 #ifndef KSAUDIO_SPEAKER_1POINT1
 #define KSAUDIO_SPEAKER_1POINT1         (SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY)
 #endif
 #ifndef KSAUDIO_SPEAKER_STEREO
 #define KSAUDIO_SPEAKER_STEREO          (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT)
 #endif
 #ifndef KSAUDIO_SPEAKER_2POINT1
 #define KSAUDIO_SPEAKER_2POINT1         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_LOW_FREQUENCY)
 #endif
 #ifndef KSAUDIO_SPEAKER_3POINT0
 #define KSAUDIO_SPEAKER_3POINT0         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER)
 #endif
 #ifndef KSAUDIO_SPEAKER_3POINT1
 #define KSAUDIO_SPEAKER_3POINT1         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | \
                                         SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY)
 #endif
 #ifndef KSAUDIO_SPEAKER_QUAD
 #define KSAUDIO_SPEAKER_QUAD            (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | \
                                         SPEAKER_BACK_LEFT  | SPEAKER_BACK_RIGHT)
 #endif
 #define KSAUDIO_SPEAKER_SURROUND        (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | \
                                         SPEAKER_FRONT_CENTER | SPEAKER_BACK_CENTER)
 #ifndef KSAUDIO_SPEAKER_5POINT0
 #define KSAUDIO_SPEAKER_5POINT0         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER | \
                                         SPEAKER_SIDE_LEFT  | SPEAKER_SIDE_RIGHT)
 #endif
 #define KSAUDIO_SPEAKER_5POINT1         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | \
                                         SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY | \
                                         SPEAKER_BACK_LEFT  | SPEAKER_BACK_RIGHT)
 #ifndef KSAUDIO_SPEAKER_7POINT0
 #define KSAUDIO_SPEAKER_7POINT0         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | SPEAKER_FRONT_CENTER | \
                                         SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT | \
                                         SPEAKER_SIDE_LEFT | SPEAKER_SIDE_RIGHT)
 #endif
 #ifndef KSAUDIO_SPEAKER_7POINT1
 #define KSAUDIO_SPEAKER_7POINT1         (SPEAKER_FRONT_LEFT | SPEAKER_FRONT_RIGHT | \
                                         SPEAKER_FRONT_CENTER | SPEAKER_LOW_FREQUENCY | \
                                         SPEAKER_BACK_LEFT | SPEAKER_BACK_RIGHT | \
                                         SPEAKER_FRONT_LEFT_OF_CENTER | SPEAKER_FRONT_RIGHT_OF_CENTER)
 #endif
 static struct
 {
  guint64 mf_pos;
  GstAudioChannelPosition gst_pos;
 } mf_to_gst_pos[] = {
  {SPEAKER_FRONT_LEFT, GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT},
  {SPEAKER_FRONT_RIGHT, GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT},
  {SPEAKER_FRONT_CENTER, GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER},
  {SPEAKER_LOW_FREQUENCY, GST_AUDIO_CHANNEL_POSITION_LFE1},
  {SPEAKER_BACK_LEFT, GST_AUDIO_CHANNEL_POSITION_REAR_LEFT},
  {SPEAKER_BACK_RIGHT, GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT},
  {SPEAKER_FRONT_LEFT_OF_CENTER,
      GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT_OF_CENTER},
  {SPEAKER_FRONT_RIGHT_OF_CENTER,
      GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT_OF_CENTER},
  {SPEAKER_BACK_CENTER, GST_AUDIO_CHANNEL_POSITION_REAR_CENTER},
  /* Enum values diverge from this point onwards */
  {SPEAKER_SIDE_LEFT, GST_AUDIO_CHANNEL_POSITION_SIDE_LEFT},
  {SPEAKER_SIDE_RIGHT, GST_AUDIO_CHANNEL_POSITION_SIDE_RIGHT},
  {SPEAKER_TOP_CENTER, GST_AUDIO_CHANNEL_POSITION_TOP_CENTER},
  {SPEAKER_TOP_FRONT_LEFT, GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_LEFT},
  {SPEAKER_TOP_FRONT_CENTER, GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_CENTER},
  {SPEAKER_TOP_FRONT_RIGHT, GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_RIGHT},
  {SPEAKER_TOP_BACK_LEFT, GST_AUDIO_CHANNEL_POSITION_TOP_REAR_LEFT},
  {SPEAKER_TOP_BACK_CENTER, GST_AUDIO_CHANNEL_POSITION_TOP_REAR_CENTER},
  {SPEAKER_TOP_BACK_RIGHT, GST_AUDIO_CHANNEL_POSITION_TOP_REAR_RIGHT}
 };
 /* *INDENT-OFF* */
 static DWORD default_ch_masks[] = {
  0,
  KSAUDIO_SPEAKER_MONO,
  /* 2ch */
  KSAUDIO_SPEAKER_STEREO,
  /* 2.1ch */
  /* KSAUDIO_SPEAKER_3POINT0 ? */
  KSAUDIO_SPEAKER_2POINT1,
  /* 4ch */
  /* KSAUDIO_SPEAKER_3POINT1 or KSAUDIO_SPEAKER_SURROUND ? */
  KSAUDIO_SPEAKER_QUAD,
  /* 5ch */
  KSAUDIO_SPEAKER_5POINT0,
  /* 5.1ch */
  KSAUDIO_SPEAKER_5POINT1,
  /* 7ch */
  KSAUDIO_SPEAKER_7POINT0,
  /* 7.1ch */
  KSAUDIO_SPEAKER_7POINT1,
 };
 /* *INDENT-ON* */
 static void
 gst_mf_media_audio_channel_mask_to_position (guint channels, DWORD mask,
    GstAudioChannelPosition * position)
 {
  guint i, ch;
  for (i = 0, ch = 0; i < G_N_ELEMENTS (mf_to_gst_pos) && ch < channels; i++) {
    if ((mask & mf_to_gst_pos[i].mf_pos) == 0)
      continue;
    position[ch] = mf_to_gst_pos[i].gst_pos;
    ch++;
  }
 }
 static GstCaps *
 gst_mf_media_type_to_audio_caps (IMFMediaType * media_type)
 {
  GUID subtype;
  HRESULT hr;
  UINT32 bps;
  GstAudioFormat format = GST_AUDIO_FORMAT_UNKNOWN;
  GstAudioInfo info;
  UINT32 rate, channels, mask;
  GstAudioChannelPosition position[64];
  hr = media_type->GetGUID (MF_MT_SUBTYPE, &subtype);
  if (FAILED (hr)) {
    GST_WARNING ("failed to get subtype, hr: 0x%x", (guint) hr);
    return nullptr;
  }
  if (!IsEqualGUID (subtype, MFAudioFormat_PCM) &&
      !IsEqualGUID (subtype, MFAudioFormat_Float)) {
    GST_FIXME ("Unknown subtype");
    return nullptr;
  }
  hr = media_type->GetUINT32 (MF_MT_AUDIO_BITS_PER_SAMPLE, &bps);
  if (FAILED (hr)) {
    GST_WARNING ("Failed to get bps, hr: 0x%x", (guint) hr);
    return nullptr;
  }
  if (IsEqualGUID (subtype, MFAudioFormat_PCM)) {
    format = gst_audio_format_build_integer (TRUE, G_LITTLE_ENDIAN, bps, bps);
  } else if (bps == 32) {
    format = GST_AUDIO_FORMAT_F32LE;
  } else if (bps == 64) {
    format = GST_AUDIO_FORMAT_F64LE;
  }
  if (format == GST_AUDIO_FORMAT_UNKNOWN) {
    GST_WARNING ("Unknown audio format");
    return nullptr;
  }
  hr = media_type->GetUINT32 (MF_MT_AUDIO_NUM_CHANNELS, &channels);
  if (FAILED (hr) || channels == 0) {
    GST_WARNING ("Unknown channels");
    return nullptr;
  }
  hr = media_type->GetUINT32 (MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate);
  if (FAILED (hr) || rate == 0) {
    GST_WARNING ("Unknown rate");
    return nullptr;
  }
  for (guint i = 0; i < G_N_ELEMENTS (position); i++)
    position[i] = GST_AUDIO_CHANNEL_POSITION_NONE;
  hr = media_type->GetUINT32 (MF_MT_AUDIO_CHANNEL_MASK, &mask);
  if (FAILED (hr)) {
    if (channels == 1) {
      position[0] = GST_AUDIO_CHANNEL_POSITION_MONO;
    } else if (channels == 2) {
      position[0] = GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT;
      position[1] = GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT;
    } else if (channels <= 8) {
      GST_WARNING ("Unknown channel position, use default value");
      gst_mf_media_audio_channel_mask_to_position (channels,
          default_ch_masks[channels], position);
    } else {
      GST_WARNING ("Failed to determine channel position");
      return nullptr;
    }
  } else {
    gst_mf_media_audio_channel_mask_to_position (channels, mask, position);
  }
  gst_audio_info_set_format (&info, format, rate, channels, position);
  return gst_audio_info_to_caps (&info);
 }
 GstCaps *
 gst_mf_media_type_to_caps (IMFMediaType * media_type)
 {
@ -357,8 +549,11 @@ gst_mf_media_type_to_caps (IMFMediaType * media_type)
    return nullptr;
  }
-  if (IsEqualGUID (major_type, MFMediaType_Video))
+  if (IsEqualGUID (major_type, MFMediaType_Video)) {
    return gst_mf_media_type_to_video_caps (media_type);
  } else if (IsEqualGUID (major_type, MFMediaType_Audio)) {
    return gst_mf_media_type_to_audio_caps (media_type);
  }
  return nullptr;
 }
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfutils.h
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/gstmfutils.h
@ -23,6 +23,7 @@
 #include <gst/gst.h>
 #include <gst/video/video.h>
 #include <gst/audio/audio.h>
 #ifndef INITGUID
 #include <initguid.h>
@ -75,4 +76,4 @@ void           _gst_mf_dump_attributes (IMFAttributes * attr,
 G_END_DECLS
-#endif /* __GST_MF_UTILS_H__ */
+#endif /* __GST_MF_UTILS_H__ */
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/meson.build
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/meson.build
@ -1,5 +1,7 @@
 mf_sources = [
  'gstmfaacdec.cpp',
  'gstmfaacenc.cpp',
  'gstmfaudiodecoder.cpp',
  'gstmfaudioencoder.cpp',
  'gstmfdevice.cpp',
  'gstmfh264enc.cpp',
--- a/subprojects/gst-plugins-bad/sys/mediafoundation/plugin.cpp
+++ b/subprojects/gst-plugins-bad/sys/mediafoundation/plugin.cpp
@ -72,6 +72,7 @@
 #include "gstmfvp9enc.h"
 #include "gstmfaacenc.h"
 #include "gstmfmp3enc.h"
 #include "gstmfaacdec.h"
 #if GST_MF_HAVE_D3D11
 #include <gst/d3d11/gstd3d11.h>
@ -237,6 +238,7 @@ plugin_init (GstPlugin * plugin)
  gst_mf_aac_enc_plugin_init (plugin, GST_RANK_SECONDARY);
  gst_mf_mp3_enc_plugin_init (plugin, GST_RANK_SECONDARY);
  gst_mf_aac_dec_plugin_init (plugin, GST_RANK_SECONDARY);
  /* So that call MFShutdown() when this plugin is no more used
   * (i.e., gst_deinit). Otherwise valgrind-like tools would complain