mediafoundation: Add support for AAC encoding

Add MediaFoundation AAC encoder element. Before Windows 10, mono and stereo channels were supported audio channels configuration by AAC encoder MFT. However, on Windows 10, 5.1 channels support was introduced. To expose correct range of support format by this element whatever the OS version is, this element will enumerate all the supported format by the AAC encoder MFT and then will configure sink/src templates while plugin init. Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-bad/-/merge_requests/1280>
2025-03-13 07:02:53 +00:00 · 2020-05-18 18:12:38 +09:00 · 2020-05-18 18:12:38 +09:00 · cee619486a
commit cee619486a
parent 18f5bdee16
8 changed files with 1244 additions and 7 deletions
--- a/sys/mediafoundation/gstmfaacenc.cpp
+++ b/sys/mediafoundation/gstmfaacenc.cpp
@ -0,0 +1,743 @@
+/* GStreamer
+ * Copyright (C) 2020 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+/**
+ * SECTION:element-mfaacenc
+ * @title: mfaacenc
+ *
+ * This element encodes raw audio into AAC compressed data.
+ *
+ * ## Example pipelines
+ * |[
+ * gst-launch-1.0 -v audiotestsrc ! mfaacenc ! aacparse ! qtmux ! filesink location=audiotestsrc.mp4
+ * ]| This example pipeline will encode a test audio source to AAC using
+ * Media Foundation encoder, and muxes it in a mp4 container.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gst/gst.h>
+#include <gst/pbutils/pbutils.h>
+#include "gstmfaudioenc.h"
+#include "gstmfaacenc.h"
+#include <wrl.h>
+#include <set>
+#include <vector>
+#include <string>
+
+using namespace Microsoft::WRL;
+
+GST_DEBUG_CATEGORY (gst_mf_aac_enc_debug);
+#define GST_CAT_DEFAULT gst_mf_aac_enc_debug
+
+enum
+{
+  PROP_0,
+  PROP_BITRATE,
+};
+
+#define DEFAULT_BITRATE (0)
+
+typedef struct _GstMFAacEnc
+{
+  GstMFAudioEnc parent;
+
+  /* properteies */
+  guint bitrate;
+} GstMFAacEnc;
+
+typedef struct _GstMFAacEncClass
+{
+  GstMFAudioEncClass parent_class;
+
+} GstMFAacEncClass;
+
+typedef struct
+{
+  GstCaps *sink_caps;
+  GstCaps *src_caps;
+  gchar *device_name;
+  guint32 enum_flags;
+  guint device_index;
+  std::set<UINT32> bitrate_list;
+} GstMFAacEncClassData;
+
+static GstElementClass *parent_class = NULL;
+
+static void gst_mf_aac_enc_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec);
+static void gst_mf_aac_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec);
+static gboolean gst_mf_aac_enc_get_output_type (GstMFAudioEnc * mfenc,
+    GstAudioInfo * info, IMFMediaType ** output_type);
+static gboolean gst_mf_aac_enc_get_input_type (GstMFAudioEnc * mfenc,
+    GstAudioInfo * info, IMFMediaType ** input_type);
+static gboolean gst_mf_aac_enc_set_src_caps (GstMFAudioEnc * mfenc,
+    GstAudioInfo * info);
+
+static void
+gst_mf_aac_enc_class_init (GstMFAacEncClass * klass, gpointer data)
+{
+  GObjectClass *gobject_class = G_OBJECT_CLASS (klass);
+  GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
+  GstMFAudioEncClass *mfenc_class = GST_MF_AUDIO_ENC_CLASS (klass);
+  GstMFAacEncClassData *cdata = (GstMFAacEncClassData *) data;
+  gchar *long_name;
+  gchar *classification;
+  guint max_bitrate = 0;
+  std::string bitrate_blurb;
+
+  parent_class = (GstElementClass *) g_type_class_peek_parent (klass);
+
+  gobject_class->get_property = gst_mf_aac_enc_get_property;
+  gobject_class->set_property = gst_mf_aac_enc_set_property;
+
+  bitrate_blurb =
+      "Bitrate in bit/sec, (0 = auto), valid values are { 0";
+  for (auto iter: cdata->bitrate_list) {
+    bitrate_blurb += ", " + std::to_string (iter);
+    /* std::set<> stores values in a sorted fashion */
+    max_bitrate = iter;
+  }
+  bitrate_blurb += " }";
+
+  g_object_class_install_property (gobject_class, PROP_BITRATE,
+      g_param_spec_uint ("bitrate", "Bitrate", bitrate_blurb.c_str(), 0,
+          max_bitrate, DEFAULT_BITRATE,
+          (GParamFlags) (GST_PARAM_MUTABLE_READY | G_PARAM_READWRITE |
+          G_PARAM_STATIC_NAME | G_PARAM_STATIC_NICK)));
+
+  long_name = g_strdup_printf ("Media Foundation %s", cdata->device_name);
+  classification = g_strdup_printf ("Codec/Encoder/Audio%s",
+      (cdata->enum_flags & MFT_ENUM_FLAG_HARDWARE) == MFT_ENUM_FLAG_HARDWARE ?
+          "/Hardware" : "");
+  gst_element_class_set_metadata (element_class, long_name,
+      classification,
+      "Microsoft Media Foundation AAC Encoder",
+      "Seungha Yang <seungha@centricular.com>");
+  g_free (long_name);
+  g_free (classification);
+
+  gst_element_class_add_pad_template (element_class,
+      gst_pad_template_new ("sink", GST_PAD_SINK, GST_PAD_ALWAYS,
+          cdata->sink_caps));
+  gst_element_class_add_pad_template (element_class,
+      gst_pad_template_new ("src", GST_PAD_SRC, GST_PAD_ALWAYS,
+          cdata->src_caps));
+
+  mfenc_class->get_output_type =
+      GST_DEBUG_FUNCPTR (gst_mf_aac_enc_get_output_type);
+  mfenc_class->get_input_type =
+      GST_DEBUG_FUNCPTR (gst_mf_aac_enc_get_input_type);
+  mfenc_class->set_src_caps =
+      GST_DEBUG_FUNCPTR (gst_mf_aac_enc_set_src_caps);
+
+  mfenc_class->codec_id = MFAudioFormat_AAC;
+  mfenc_class->enum_flags = cdata->enum_flags;
+  mfenc_class->device_index = cdata->device_index;
+  mfenc_class->frame_samples = 1024;
+
+  g_free (cdata->device_name);
+  gst_caps_unref (cdata->sink_caps);
+  gst_caps_unref (cdata->src_caps);
+  delete cdata;
+}
+
+static void
+gst_mf_aac_enc_init (GstMFAacEnc * self)
+{
+  self->bitrate = DEFAULT_BITRATE;
+}
+
+static void
+gst_mf_aac_enc_get_property (GObject * object, guint prop_id,
+    GValue * value, GParamSpec * pspec)
+{
+  GstMFAacEnc *self = (GstMFAacEnc *) (object);
+
+  switch (prop_id) {
+    case PROP_BITRATE:
+      g_value_set_uint (value, self->bitrate);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static void
+gst_mf_aac_enc_set_property (GObject * object, guint prop_id,
+    const GValue * value, GParamSpec * pspec)
+{
+  GstMFAacEnc *self = (GstMFAacEnc *) (object);
+
+  switch (prop_id) {
+    case PROP_BITRATE:
+      self->bitrate = g_value_get_uint (value);
+      break;
+    default:
+      G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
+      break;
+  }
+}
+
+static gboolean
+gst_mf_aac_enc_get_output_type (GstMFAudioEnc * mfenc, GstAudioInfo * info,
+    IMFMediaType ** output_type)
+{
+  GstMFAacEnc *self = (GstMFAacEnc *) mfenc;
+  GstMFTransform *transform = mfenc->transform;
+  GList *output_list = NULL;
+  GList *iter;
+  ComPtr<IMFMediaType> target_output;
+  std::vector<ComPtr<IMFMediaType>> filtered_types;
+  std::set<UINT32> bitrate_list;
+  UINT32 bitrate;
+  UINT32 target_bitrate = 0;
+  HRESULT hr;
+
+  if (!gst_mf_transform_get_output_available_types (transform, &output_list)) {
+    GST_ERROR_OBJECT (self, "Couldn't get available output type");
+    return FALSE;
+  }
+
+  /* 1. Filtering based on channels and sample rate */
+  for (iter = output_list; iter; iter = g_list_next (iter)) {
+    IMFMediaType *type = (IMFMediaType *) iter->data;
+    GUID guid = GUID_NULL;
+    UINT32 value;
+
+    hr = type->GetGUID (MF_MT_MAJOR_TYPE, &guid);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (!IsEqualGUID (guid, MFMediaType_Audio)) {
+      GST_WARNING_OBJECT (self, "Major type is not audio");
+      continue;
+    }
+
+    hr = type->GetGUID (MF_MT_SUBTYPE, &guid);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (!IsEqualGUID (guid, MFAudioFormat_AAC)) {
+      GST_WARNING_OBJECT (self, "Sub type is not AAC");
+      continue;
+    }
+
+    hr = type->GetUINT32 (MF_MT_AUDIO_NUM_CHANNELS, &value);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (value != GST_AUDIO_INFO_CHANNELS (info))
+      continue;
+
+    hr = type->GetUINT32 (MF_MT_AUDIO_SAMPLES_PER_SECOND, &value);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (value != GST_AUDIO_INFO_RATE (info))
+      continue;
+
+    hr = type->GetUINT32 (MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &value);
+    if (!gst_mf_result (hr))
+      continue;
+
+    filtered_types.push_back (type);
+    /* convert bytes to bit */
+    bitrate_list.insert (value * 8);
+  }
+
+  g_list_free_full (output_list, (GDestroyNotify) gst_mf_media_type_release);
+
+  if (filtered_types.empty()) {
+    GST_ERROR_OBJECT (self, "Couldn't find target output type");
+    return FALSE;
+  }
+
+  GST_DEBUG_OBJECT (self, "have %d candidate output", filtered_types.size());
+
+  /* 2. Find the best matching bitrate */
+  bitrate = self->bitrate;
+
+  /* Media Foundation AAC encoder supports sample-rate 44100 or 48000 */
+  if (bitrate == 0) {
+    /* http://wiki.hydrogenaud.io/index.php?title=Fraunhofer_FDK_AAC#Recommended_Sampling_Rate_and_Bitrate_Combinations
+     * was referenced but the supported range by MediaFoudation is much limited
+     * than it */
+    if (GST_AUDIO_INFO_CHANNELS (info) == 1) {
+      if (GST_AUDIO_INFO_RATE (info) <= 44100) {
+        bitrate = 96000;
+      } else {
+        bitrate = 160000;
+      }
+    } else if (GST_AUDIO_INFO_CHANNELS (info) == 2) {
+      if (GST_AUDIO_INFO_RATE (info) <= 44100) {
+        bitrate = 112000;
+      } else {
+        bitrate = 320000;
+      }
+    } else {
+      /* 5.1 */
+      if (GST_AUDIO_INFO_RATE (info) <= 44100) {
+        bitrate = 240000;
+      } else {
+        bitrate = 320000;
+      }
+    }
+
+    GST_DEBUG_OBJECT (self, "Calculated bitrate %d", bitrate);
+  } else {
+    GST_DEBUG_OBJECT (self, "Requested bitrate %d", bitrate);
+  }
+
+  GST_DEBUG_OBJECT (self, "Available bitrates");
+  for (auto it: bitrate_list)
+    GST_DEBUG_OBJECT (self, "\t%d", it);
+
+  /* Based on calculated or requested bitrate, find the closest supported
+   * bitrate */
+  {
+    const auto it = bitrate_list.lower_bound (bitrate);
+    if (it == bitrate_list.end()) {
+      target_bitrate = *std::prev (it);
+    } else {
+      target_bitrate = *it;
+    }
+  }
+
+  GST_DEBUG_OBJECT (self, "Selected target bitrate %d", target_bitrate);
+
+  for (auto it: filtered_types) {
+    UINT32 value = 0;
+
+    it->GetUINT32 (MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &value);
+    if (value * 8 == target_bitrate) {
+      target_output = it;
+      break;
+    }
+  }
+
+  if (!target_output) {
+    GST_ERROR_OBJECT (self, "Failed to decide final output type");
+    return FALSE;
+  }
+
+  *output_type = target_output.Detach();
+
+  return TRUE;
+}
+
+static gboolean
+gst_mf_aac_enc_get_input_type (GstMFAudioEnc * mfenc, GstAudioInfo * info,
+    IMFMediaType ** input_type)
+{
+  GstMFAacEnc *self = (GstMFAacEnc *) mfenc;
+  GstMFTransform *transform = mfenc->transform;
+  GList *input_list = NULL;
+  GList *iter;
+  ComPtr<IMFMediaType> target_input;
+  std::vector<ComPtr<IMFMediaType>> filtered_types;
+  std::set<UINT32> bitrate_list;
+  HRESULT hr;
+
+  if (!gst_mf_transform_get_input_available_types (transform, &input_list)) {
+    GST_ERROR_OBJECT (self, "Couldn't get available output type");
+    return FALSE;
+  }
+
+  /* 1. Filtering based on channels and sample rate */
+  for (iter = input_list; iter; iter = g_list_next (iter)) {
+    IMFMediaType *type = (IMFMediaType *) iter->data;
+    GUID guid = GUID_NULL;
+    UINT32 value;
+
+    hr = type->GetGUID (MF_MT_MAJOR_TYPE, &guid);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (!IsEqualGUID (guid, MFMediaType_Audio)) {
+      GST_WARNING_OBJECT (self, "Major type is not audio");
+      continue;
+    }
+
+    hr = type->GetGUID (MF_MT_SUBTYPE, &guid);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (!IsEqualGUID (guid, MFAudioFormat_PCM)) {
+      GST_WARNING_OBJECT (self, "Sub type is not PCM");
+      continue;
+    }
+
+    hr = type->GetUINT32 (MF_MT_AUDIO_NUM_CHANNELS, &value);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (value != GST_AUDIO_INFO_CHANNELS (info))
+      continue;
+
+    hr = type->GetUINT32 (MF_MT_AUDIO_SAMPLES_PER_SECOND, &value);
+    if (!gst_mf_result (hr))
+      continue;
+
+    if (value != GST_AUDIO_INFO_RATE (info))
+      continue;
+
+    filtered_types.push_back (type);
+  }
+
+  g_list_free_full (input_list, (GDestroyNotify) gst_mf_media_type_release);
+
+  if (filtered_types.empty()) {
+    GST_ERROR_OBJECT (self, "Couldn't find target input type");
+    return FALSE;
+  }
+
+  GST_DEBUG_OBJECT (self, "Total %d input types are available",
+      filtered_types.size());
+
+  /* Just select the first one */
+  target_input = *filtered_types.begin();
+
+  *input_type = target_input.Detach();
+
+  return TRUE;
+}
+
+static gboolean
+gst_mf_aac_enc_set_src_caps (GstMFAudioEnc * mfenc,
+    GstAudioInfo * info)
+{
+  GstMFAacEnc *self = (GstMFAacEnc *) mfenc;
+  HRESULT hr;
+  GstCaps *src_caps;
+  GstBuffer *codec_data;
+  UINT8 *blob = NULL;
+  UINT32 blob_size = 0;
+  gboolean ret;
+  ComPtr<IMFMediaType> output_type;
+  static const guint config_data_offset = 12;
+
+  if (!gst_mf_transform_get_output_current_type (mfenc->transform, &output_type)) {
+    GST_ERROR_OBJECT (self, "Couldn't get current output type");
+    return FALSE;
+  }
+
+  /* user data contains the portion of the HEAACWAVEINFO structure that appears
+   * after the WAVEFORMATEX structure (that is, after the wfx member).
+   * This is followed by the AudioSpecificConfig() data,
+   * as defined by ISO/IEC 14496-3.
+   * https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder
+   *
+   * The offset AudioSpecificConfig() data is 12 in this case
+   */
+  hr = output_type->GetBlobSize (MF_MT_USER_DATA, &blob_size);
+  if (!gst_mf_result (hr) || blob_size <= config_data_offset) {
+    GST_ERROR_OBJECT (self,
+        "Couldn't get size of MF_MT_USER_DATA, size %d, %d", blob_size);
+    return FALSE;
+  }
+
+  hr = output_type->GetAllocatedBlob (MF_MT_USER_DATA, &blob, &blob_size);
+  if (!gst_mf_result (hr)) {
+    GST_ERROR_OBJECT (self, "Couldn't get user data blob");
+    return FALSE;
+  }
+
+  codec_data = gst_buffer_new_and_alloc (blob_size - config_data_offset);
+  gst_buffer_fill (codec_data, 0, blob + config_data_offset,
+      blob_size - config_data_offset);
+
+  src_caps = gst_caps_new_simple ("audio/mpeg",
+      "mpegversion", G_TYPE_INT, 4,
+      "stream-format", G_TYPE_STRING, "raw",
+      "channels", G_TYPE_INT, GST_AUDIO_INFO_CHANNELS (info),
+      "rate", G_TYPE_INT, GST_AUDIO_INFO_RATE (info),
+      "framed", G_TYPE_BOOLEAN, TRUE,
+      "codec_data", GST_TYPE_BUFFER, codec_data, NULL);
+  gst_buffer_unref (codec_data);
+
+  gst_codec_utils_aac_caps_set_level_and_profile (src_caps,
+      blob + config_data_offset, blob_size - config_data_offset);
+  CoTaskMemFree (blob);
+
+  ret = gst_audio_encoder_set_output_format (GST_AUDIO_ENCODER (self), src_caps);
+  if (!ret) {
+    GST_WARNING_OBJECT (self,
+        "Couldn't set output format %" GST_PTR_FORMAT, src_caps);
+  }
+  gst_caps_unref (src_caps);
+
+  return ret;
+}
+
+static void
+gst_mf_aac_enc_register (GstPlugin * plugin, guint rank,
+    const gchar * device_name, guint32 enum_flags, guint device_index,
+    GstCaps * sink_caps, GstCaps * src_caps,
+    const std::set<UINT32> &bitrate_list)
+{
+  GType type;
+  gchar *type_name;
+  gchar *feature_name;
+  gint i;
+  GstMFAacEncClassData *cdata;
+  gboolean is_default = TRUE;
+  GTypeInfo type_info = {
+    sizeof (GstMFAacEncClass),
+    NULL,
+    NULL,
+    (GClassInitFunc) gst_mf_aac_enc_class_init,
+    NULL,
+    NULL,
+    sizeof (GstMFAacEnc),
+    0,
+    (GInstanceInitFunc) gst_mf_aac_enc_init,
+  };
+
+  cdata = new GstMFAacEncClassData;
+  cdata->sink_caps = sink_caps;
+  cdata->src_caps = src_caps;
+  cdata->device_name = g_strdup (device_name);
+  cdata->enum_flags = enum_flags;
+  cdata->device_index = device_index;
+  cdata->bitrate_list = bitrate_list;
+  type_info.class_data = cdata;
+
+  type_name = g_strdup ("GstMFAacEnc");
+  feature_name = g_strdup ("mfaacenc");
+
+  i = 1;
+  while (g_type_from_name (type_name) != 0) {
+    g_free (type_name);
+    g_free (feature_name);
+    type_name = g_strdup_printf ("GstMFAacDevice%dEnc", i);
+    feature_name = g_strdup_printf ("mfaacdevice%denc", i);
+    is_default = FALSE;
+    i++;
+  }
+
+  type =
+      g_type_register_static (GST_TYPE_MF_AUDIO_ENC, type_name, &type_info,
+      (GTypeFlags) 0);
+
+  /* make lower rank than default device */
+  if (rank > 0 && !is_default)
+    rank--;
+
+  if (!gst_element_register (plugin, feature_name, rank, type))
+    GST_WARNING ("Failed to register plugin '%s'", type_name);
+
+  g_free (type_name);
+  g_free (feature_name);
+}
+
+static void
+gst_mf_aac_enc_plugin_init_internal (GstPlugin * plugin, guint rank,
+    GstMFTransform * transform, guint device_index, guint32 enum_flags)
+{
+  HRESULT hr;
+  gint i;
+  GstCaps *src_caps = NULL;
+  GstCaps *sink_caps = NULL;
+  gchar *device_name = NULL;
+  GList *output_list = NULL;
+  GList *iter;
+  std::set<UINT32> channels_list;
+  std::set<UINT32> rate_list;
+  std::set<UINT32> bitrate_list;
+  gboolean config_found = FALSE;
+  GValue channles_value = G_VALUE_INIT;
+  GValue rate_value = G_VALUE_INIT;
+
+  if (!gst_mf_transform_open (transform))
+    return;
+
+  g_object_get (transform, "device-name", &device_name, NULL);
+  if (!device_name) {
+    GST_WARNING_OBJECT (transform, "Unknown device name");
+    return;
+  }
+
+  if (!gst_mf_transform_get_output_available_types (transform, &output_list)) {
+    GST_WARNING_OBJECT (transform, "Couldn't get output types");
+    goto done;
+  }
+
+  GST_INFO_OBJECT (transform, "Have %d output type", g_list_length (output_list));
+
+  for (iter = output_list, i = 0; iter; iter = g_list_next (iter), i++) {
+    UINT32 channels, rate, bitrate;
+    GUID guid = GUID_NULL;
+    IMFMediaType *type = (IMFMediaType *) iter->data;
+#ifndef GST_DISABLE_GST_DEBUG
+    gchar *msg = g_strdup_printf ("Output IMFMediaType %d", i);
+    gst_mf_dump_attributes ((IMFAttributes *) type, msg, GST_LEVEL_TRACE);
+    g_free (msg);
+#endif
+
+    hr = type->GetGUID (MF_MT_MAJOR_TYPE, &guid);
+    if (!gst_mf_result (hr))
+      continue;
+
+    /* shouldn't happen */
+    if (!IsEqualGUID (guid, MFMediaType_Audio))
+      continue;
+
+    hr = type->GetGUID (MF_MT_SUBTYPE, &guid);
+    if (!gst_mf_result (hr))
+      continue;
+
+    /* shouldn't happen */
+    if (!IsEqualGUID (guid, MFAudioFormat_AAC))
+      continue;
+
+    /* Windows 10 channels 6 (5.1) channels so we cannot hard code it */
+    hr = type->GetUINT32 (MF_MT_AUDIO_NUM_CHANNELS, &channels);
+    if (!gst_mf_result (hr))
+      continue;
+
+    hr = type->GetUINT32 (MF_MT_AUDIO_SAMPLES_PER_SECOND, &rate);
+    if (!gst_mf_result (hr))
+      continue;
+
+    /* NOTE: MFT AAC encoder seems to support more bitrate than it's documented
+     * at https://docs.microsoft.com/en-us/windows/win32/medfound/aac-encoder
+     * We will pass supported bitrate values to class init
+     */
+    hr = type->GetUINT32 (MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &bitrate);
+    if (!gst_mf_result (hr))
+      continue;
+
+    channels_list.insert (channels);
+    rate_list.insert (rate);
+    /* convert bytes to bit */
+    bitrate_list.insert (bitrate * 8);
+
+    config_found = TRUE;
+  }
+
+  if (!config_found) {
+    GST_WARNING_OBJECT (transform, "Couldn't find available configuration");
+    goto done;
+  }
+
+  src_caps =
+      gst_caps_from_string ("audio/mpeg, mpegversion = (int) 4, "
+      "stream-format = (string) raw, framed = (boolean) true, "
+      "base-profile = (string) lc");
+  sink_caps =
+      gst_caps_from_string ("audio/x-raw, layout = (string) interleaved, "
+      "format = (string) " GST_AUDIO_NE (S16));
+
+  g_value_init (&channles_value, GST_TYPE_LIST);
+  g_value_init (&rate_value, GST_TYPE_LIST);
+
+  for (auto it: channels_list) {
+    GValue channles = G_VALUE_INIT;
+
+    g_value_init (&channles, G_TYPE_INT);
+    g_value_set_int (&channles, (gint) it);
+    gst_value_list_append_and_take_value (&channles_value, &channles);
+  }
+
+  for (auto it: rate_list) {
+    GValue rate = G_VALUE_INIT;
+
+    g_value_init (&rate, G_TYPE_INT);
+    g_value_set_int (&rate, (gint) it);
+    gst_value_list_append_and_take_value (&rate_value, &rate);
+  }
+
+  gst_caps_set_value (src_caps, "channels", &channles_value);
+  gst_caps_set_value (sink_caps, "channels", &channles_value);
+
+  gst_caps_set_value (src_caps, "rate", &rate_value);
+  gst_caps_set_value (sink_caps, "rate", &rate_value);
+
+  GST_MINI_OBJECT_FLAG_SET (sink_caps, GST_MINI_OBJECT_FLAG_MAY_BE_LEAKED);
+  GST_MINI_OBJECT_FLAG_SET (src_caps, GST_MINI_OBJECT_FLAG_MAY_BE_LEAKED);
+
+  gst_mf_aac_enc_register (plugin, rank, device_name, enum_flags, device_index,
+      sink_caps, src_caps, bitrate_list);
+
+ done:
+  if (output_list)
+    g_list_free_full (output_list, (GDestroyNotify) gst_mf_media_type_release);
+  g_free (device_name);
+  g_value_unset (&channles_value);
+  g_value_unset (&rate_value);
+}
+
+void
+gst_mf_aac_enc_plugin_init (GstPlugin * plugin, guint rank)
+{
+  GstMFTransformEnumParams enum_params = { 0, };
+  MFT_REGISTER_TYPE_INFO output_type;
+  GstMFTransform *transform;
+  gint i;
+  gboolean do_next;
+
+  GST_DEBUG_CATEGORY_INIT (gst_mf_aac_enc_debug, "mfaacenc", 0, "mfaacenc");
+
+  output_type.guidMajorType = MFMediaType_Audio;
+  output_type.guidSubtype = MFAudioFormat_AAC;
+
+  enum_params.category = MFT_CATEGORY_AUDIO_ENCODER;
+  enum_params.enum_flags = (MFT_ENUM_FLAG_HARDWARE | MFT_ENUM_FLAG_ASYNCMFT |
+      MFT_ENUM_FLAG_SORTANDFILTER  | MFT_ENUM_FLAG_SORTANDFILTER_APPROVED_ONLY);
+  enum_params.output_typeinfo = &output_type;
+
+  /* register hardware encoders first (likey no hardware audio encoder) */
+  i = 0;
+  do {
+    enum_params.device_index = i++;
+    transform = gst_mf_transform_new (&enum_params);
+    do_next = TRUE;
+
+    if (!transform) {
+      do_next = FALSE;
+    } else {
+      gst_mf_aac_enc_plugin_init_internal (plugin, rank, transform,
+          enum_params.device_index, enum_params.enum_flags);
+      gst_clear_object (&transform);
+    }
+  } while (do_next);
+
+  /* register software encoders */
+  enum_params.enum_flags = (MFT_ENUM_FLAG_SYNCMFT |
+      MFT_ENUM_FLAG_SORTANDFILTER | MFT_ENUM_FLAG_SORTANDFILTER_APPROVED_ONLY);
+  i = 0;
+  do {
+    enum_params.device_index = i++;
+    transform = gst_mf_transform_new (&enum_params);
+    do_next = TRUE;
+
+    if (!transform) {
+      do_next = FALSE;
+    } else {
+      gst_mf_aac_enc_plugin_init_internal (plugin, rank, transform,
+          enum_params.device_index, enum_params.enum_flags);
+      gst_clear_object (&transform);
+    }
+  } while (do_next);
+}
--- a/sys/mediafoundation/gstmfaacenc.h
+++ b/sys/mediafoundation/gstmfaacenc.h
@ -0,0 +1,32 @@
+/* GStreamer
+ * Copyright (C) 2020 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_MF_AAC_ENC_H__
+#define __GST_MF_AAC_ENC_H__
+
+#include <gst/gst.h>
+
+G_BEGIN_DECLS
+
+void gst_mf_aac_enc_plugin_init (GstPlugin * plugin,
+                                 guint rank);
+
+G_END_DECLS
+
+#endif /* __GST_MF_AAC_ENC_H__ */
--- a/sys/mediafoundation/gstmfaudioenc.cpp
+++ b/sys/mediafoundation/gstmfaudioenc.cpp
@ -0,0 +1,328 @@
+/* GStreamer
+ * Copyright (C) 2020 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <gst/gst.h>
+#include "gstmfaudioenc.h"
+#include <wrl.h>
+#include <string.h>
+
+using namespace Microsoft::WRL;
+
+GST_DEBUG_CATEGORY (gst_mf_audio_enc_debug);
+#define GST_CAT_DEFAULT gst_mf_audio_enc_debug
+
+#define gst_mf_audio_enc_parent_class parent_class
+G_DEFINE_ABSTRACT_TYPE_WITH_CODE (GstMFAudioEnc, gst_mf_audio_enc,
+    GST_TYPE_AUDIO_ENCODER,
+    GST_DEBUG_CATEGORY_INIT (gst_mf_audio_enc_debug, "mfaudioenc", 0,
+      "mfaudioenc"));
+
+static gboolean gst_mf_audio_enc_open (GstAudioEncoder * enc);
+static gboolean gst_mf_audio_enc_close (GstAudioEncoder * enc);
+static gboolean gst_mf_audio_enc_set_format (GstAudioEncoder * enc,
+    GstAudioInfo * info);
+static GstFlowReturn gst_mf_audio_enc_handle_frame (GstAudioEncoder * enc,
+    GstBuffer *buffer);
+static GstFlowReturn gst_mf_audio_enc_drain (GstAudioEncoder * enc);
+static void gst_mf_audio_enc_flush (GstAudioEncoder * enc);
+
+static void
+gst_mf_audio_enc_class_init (GstMFAudioEncClass * klass)
+{
+  GstAudioEncoderClass *audioenc_class = GST_AUDIO_ENCODER_CLASS (klass);
+
+  audioenc_class->open = GST_DEBUG_FUNCPTR (gst_mf_audio_enc_open);
+  audioenc_class->close = GST_DEBUG_FUNCPTR (gst_mf_audio_enc_close);
+  audioenc_class->set_format = GST_DEBUG_FUNCPTR (gst_mf_audio_enc_set_format);
+  audioenc_class->handle_frame =
+      GST_DEBUG_FUNCPTR (gst_mf_audio_enc_handle_frame);
+  audioenc_class->flush =
+      GST_DEBUG_FUNCPTR (gst_mf_audio_enc_flush);
+}
+
+static void
+gst_mf_audio_enc_init (GstMFAudioEnc * self)
+{
+  gst_audio_encoder_set_drainable (GST_AUDIO_ENCODER (self), TRUE);
+}
+
+static gboolean
+gst_mf_audio_enc_open (GstAudioEncoder * enc)
+{
+  GstMFAudioEnc *self = GST_MF_AUDIO_ENC (enc);
+  GstMFAudioEncClass *klass = GST_MF_AUDIO_ENC_GET_CLASS (enc);
+  GstMFTransformEnumParams enum_params = { 0, };
+  MFT_REGISTER_TYPE_INFO output_type;
+  gboolean ret;
+
+  output_type.guidMajorType = MFMediaType_Audio;
+  output_type.guidSubtype = klass->codec_id;
+
+  enum_params.category = MFT_CATEGORY_AUDIO_ENCODER;
+  enum_params.enum_flags = klass->enum_flags;
+  enum_params.output_typeinfo = &output_type;
+  enum_params.device_index = klass->device_index;
+
+  GST_DEBUG_OBJECT (self, "Create MFT with enum flags 0x%x, device index %d",
+      klass->enum_flags, klass->device_index);
+
+  self->transform = gst_mf_transform_new (&enum_params);
+  ret = !!self->transform;
+
+  if (!ret)
+    GST_ERROR_OBJECT (self, "Cannot create MFT object");
+
+  return ret;
+}
+
+static gboolean
+gst_mf_audio_enc_close (GstAudioEncoder * enc)
+{
+  GstMFAudioEnc *self = GST_MF_AUDIO_ENC (enc);
+
+  gst_clear_object (&self->transform);
+
+  return TRUE;
+}
+
+static gboolean
+gst_mf_audio_enc_set_format (GstAudioEncoder * enc, GstAudioInfo * info)
+{
+  GstMFAudioEnc *self = GST_MF_AUDIO_ENC (enc);
+  GstMFAudioEncClass *klass = GST_MF_AUDIO_ENC_GET_CLASS (enc);
+  ComPtr<IMFMediaType> in_type;
+  ComPtr<IMFMediaType> out_type;
+
+  GST_DEBUG_OBJECT (self, "Set format");
+
+  gst_mf_audio_enc_drain (enc);
+
+  if (!gst_mf_transform_open (self->transform)) {
+    GST_ERROR_OBJECT (self, "Failed to open MFT");
+    return FALSE;
+  }
+
+  g_assert (klass->get_output_type != NULL);
+  if (!klass->get_output_type (self, info, &out_type)) {
+    GST_ERROR_OBJECT (self, "subclass failed to set output type");
+    return FALSE;
+  }
+
+  gst_mf_dump_attributes (out_type.Get(), "Set output type", GST_LEVEL_DEBUG);
+
+  if (!gst_mf_transform_set_output_type (self->transform, out_type.Get ())) {
+    GST_ERROR_OBJECT (self, "Couldn't set output type");
+    return FALSE;
+  }
+
+  g_assert (klass->get_input_type != NULL);
+  if (!klass->get_input_type (self, info, &in_type)) {
+    GST_ERROR_OBJECT (self, "subclass didn't provide input type");
+    return FALSE;
+  }
+
+  gst_mf_dump_attributes (in_type.Get(), "Set input type", GST_LEVEL_DEBUG);
+
+  if (!gst_mf_transform_set_input_type (self->transform, in_type.Get ())) {
+    GST_ERROR_OBJECT (self, "Couldn't set input media type");
+    return FALSE;
+  }
+
+  g_assert (klass->set_src_caps != NULL);
+  if (!klass->set_src_caps (self, info))
+    return FALSE;
+
+  g_assert (klass->frame_samples > 0);
+  gst_audio_encoder_set_frame_samples_min (enc, klass->frame_samples);
+  gst_audio_encoder_set_frame_samples_max (enc, klass->frame_samples);
+  gst_audio_encoder_set_frame_max (enc, 1);
+
+  /* mediafoundation encoder needs timestamp and duration */
+  self->sample_count = 0;
+  self->sample_duration_in_mf = gst_util_uint64_scale (klass->frame_samples,
+      10000000, GST_AUDIO_INFO_RATE (info));
+
+  GST_DEBUG_OBJECT (self,
+      "Calculated sample duration %" GST_TIME_FORMAT,
+      GST_TIME_ARGS (self->sample_duration_in_mf * 100));
+
+  return TRUE;
+}
+
+static gboolean
+gst_mf_audio_enc_process_input (GstMFAudioEnc * self, GstBuffer * buffer)
+{
+  HRESULT hr;
+  ComPtr<IMFSample> sample;
+  ComPtr<IMFMediaBuffer> media_buffer;
+  BYTE *data;
+  gboolean res = FALSE;
+  GstMapInfo info;
+  guint64 timestamp;
+
+  if (!gst_buffer_map (buffer, &info, GST_MAP_READ)) {
+    GST_ELEMENT_ERROR (self,
+        RESOURCE, READ, ("Couldn't map input buffer"), (NULL));
+    return FALSE;
+  }
+
+  GST_TRACE_OBJECT (self, "Process buffer %" GST_PTR_FORMAT, buffer);
+
+  timestamp = self->sample_count * self->sample_duration_in_mf;
+
+  hr = MFCreateSample (sample.GetAddressOf ());
+  if (!gst_mf_result (hr))
+    goto done;
+
+  hr = MFCreateMemoryBuffer (info.size, media_buffer.GetAddressOf ());
+  if (!gst_mf_result (hr))
+    goto done;
+
+  hr = media_buffer->Lock (&data, NULL, NULL);
+  if (!gst_mf_result (hr))
+    goto done;
+
+  memcpy (data, info.data, info.size);
+  media_buffer->Unlock ();
+
+  hr = media_buffer->SetCurrentLength (info.size);
+  if (!gst_mf_result (hr))
+    goto done;
+
+  hr = sample->AddBuffer (media_buffer.Get ());
+  if (!gst_mf_result (hr))
+    goto done;
+
+  hr = sample->SetSampleTime (timestamp);
+  if (!gst_mf_result (hr))
+    goto done;
+
+  hr = sample->SetSampleDuration (self->sample_duration_in_mf);
+  if (!gst_mf_result (hr))
+    goto done;
+
+  if (!gst_mf_transform_process_input (self->transform, sample.Get ())) {
+    GST_ERROR_OBJECT (self, "Failed to process input");
+    goto done;
+  }
+
+  self->sample_count++;
+
+  res = TRUE;
+
+done:
+  gst_buffer_unmap (buffer, &info);
+
+  return res;
+}
+
+static GstFlowReturn
+gst_mf_audio_enc_process_output (GstMFAudioEnc * self)
+{
+  GstMFAudioEncClass *klass = GST_MF_AUDIO_ENC_GET_CLASS (self);
+  HRESULT hr;
+  BYTE *data;
+  ComPtr<IMFMediaBuffer> media_buffer;
+  ComPtr<IMFSample> sample;
+  GstBuffer *buffer;
+  GstFlowReturn res = GST_FLOW_ERROR;
+  DWORD buffer_len;
+
+  res = gst_mf_transform_get_output (self->transform, sample.GetAddressOf ());
+
+  if (res != GST_FLOW_OK)
+    return res;
+
+  hr = sample->GetBufferByIndex (0, media_buffer.GetAddressOf ());
+  if (!gst_mf_result (hr))
+    return GST_FLOW_ERROR;
+
+  hr = media_buffer->Lock (&data, NULL, &buffer_len);
+  if (!gst_mf_result (hr))
+    return GST_FLOW_ERROR;
+
+  buffer = gst_audio_encoder_allocate_output_buffer (GST_AUDIO_ENCODER (self),
+      buffer_len);
+  gst_buffer_fill (buffer, 0, data, buffer_len);
+  media_buffer->Unlock ();
+
+  return gst_audio_encoder_finish_frame (GST_AUDIO_ENCODER (self), buffer,
+      klass->frame_samples);
+}
+
+static GstFlowReturn
+gst_mf_audio_enc_handle_frame (GstAudioEncoder * enc,
+    GstBuffer *buffer)
+{
+  GstMFAudioEnc *self = GST_MF_AUDIO_ENC (enc);
+  GstFlowReturn ret;
+
+  if (!buffer)
+    return gst_mf_audio_enc_drain (enc);
+
+  if (!gst_mf_audio_enc_process_input (self, buffer)) {
+    GST_ERROR_OBJECT (self, "Failed to process input");
+    return GST_FLOW_ERROR;
+  }
+
+  do {
+    ret = gst_mf_audio_enc_process_output (self);
+  } while (ret == GST_FLOW_OK);
+
+  if (ret == GST_MF_TRANSFORM_FLOW_NEED_DATA)
+    ret = GST_FLOW_OK;
+
+  return ret;
+}
+
+static GstFlowReturn
+gst_mf_audio_enc_drain (GstAudioEncoder * enc)
+{
+  GstMFAudioEnc *self = GST_MF_AUDIO_ENC (enc);
+  GstFlowReturn ret = GST_FLOW_OK;
+
+  if (!self->transform)
+    return GST_FLOW_OK;
+
+  gst_mf_transform_drain (self->transform);
+
+  do {
+    ret = gst_mf_audio_enc_process_output (self);
+  } while (ret == GST_FLOW_OK);
+
+  if (ret == GST_MF_TRANSFORM_FLOW_NEED_DATA)
+    ret = GST_FLOW_OK;
+
+  return ret;
+}
+
+static void
+gst_mf_audio_enc_flush (GstAudioEncoder * enc)
+{
+  GstMFAudioEnc *self = GST_MF_AUDIO_ENC (enc);
+
+  if (!self->transform)
+    return;
+
+  gst_mf_transform_flush (self->transform);
+}
--- a/sys/mediafoundation/gstmfaudioenc.h
+++ b/sys/mediafoundation/gstmfaudioenc.h
@ -0,0 +1,74 @@
+/* GStreamer
+ * Copyright (C) 2020 Seungha Yang <seungha@centricular.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+
+#ifndef __GST_MF_AUDIO_ENC_H__
+#define __GST_MF_AUDIO_ENC_H__
+
+#include <gst/gst.h>
+#include <gst/audio/audio.h>
+#include "gstmfutils.h"
+#include "gstmftransform.h"
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_MF_AUDIO_ENC           (gst_mf_audio_enc_get_type())
+#define GST_MF_AUDIO_ENC(obj)           (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_MF_AUDIO_ENC,GstMFAudioEnc))
+#define GST_MF_AUDIO_ENC_CLASS(klass)   (G_TYPE_CHECK_CLASS_CAST((klass), GST_TYPE_MF_AUDIO_ENC,GstMFAudioEncClass))
+#define GST_MF_AUDIO_ENC_GET_CLASS(obj) (G_TYPE_INSTANCE_GET_CLASS((obj), GST_TYPE_MF_AUDIO_ENC,GstMFAudioEncClass))
+#define GST_IS_MF_AUDIO_ENC(obj)        (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_MF_AUDIO_ENC))
+#define GST_IS_MF_AUDIO_ENC_CLASS(obj)  (G_TYPE_CHECK_CLASS_TYPE((klass), GST_TYPE_MF_AUDIO_ENC))
+
+typedef struct _GstMFAudioEnc GstMFAudioEnc;
+typedef struct _GstMFAudioEncClass GstMFAudioEncClass;
+
+struct _GstMFAudioEnc
+{
+  GstAudioEncoder parent;
+
+  GstMFTransform *transform;
+  guint64 sample_duration_in_mf;
+  guint64 sample_count;
+};
+
+struct _GstMFAudioEncClass
+{
+  GstAudioEncoderClass parent_class;
+
+  GUID codec_id;
+  guint32 enum_flags;
+  guint device_index;
+  gint frame_samples;
+
+  gboolean (*get_output_type) (GstMFAudioEnc * mfenc,
+                               GstAudioInfo * info,
+                               IMFMediaType ** output_type);
+
+  gboolean (*get_input_type)  (GstMFAudioEnc * mfenc,
+                               GstAudioInfo * info,
+                               IMFMediaType ** input_type);
+
+  gboolean (*set_src_caps)    (GstMFAudioEnc * mfenc,
+                               GstAudioInfo * info);
+};
+
+GType gst_mf_audio_enc_get_type (void);
+
+G_END_DECLS
+
+#endif /* __GST_MF_AUDIO_ENC_H__ */
--- a/sys/mediafoundation/gstmftransform.cpp
+++ b/sys/mediafoundation/gstmftransform.cpp
@ -65,7 +65,7 @@ struct _GstMFTransform
  DWORD input_id;
  DWORD output_id;

-  gboolean need_start;
+  gboolean running;

  gint pending_need_input;
  gint pending_have_output;
@ -512,7 +512,7 @@ gst_mf_transform_process_input (GstMFTransform * object,
  if (!object->transform)
    return FALSE;

-  if (object->need_start) {
+  if (!object->running) {
    hr = object->transform->ProcessMessage (MFT_MESSAGE_NOTIFY_START_OF_STREAM,
        0);
    if (!gst_mf_result (hr)) {
@ -527,7 +527,7 @@ gst_mf_transform_process_input (GstMFTransform * object,
      return FALSE;
    }

-    object->need_start = FALSE;
+    object->running = TRUE;
  }

  gst_mf_transform_drain_all_events (object);
@ -601,14 +601,14 @@ gst_mf_transform_flush (GstMFTransform * object)
  g_return_val_if_fail (GST_IS_MF_TRANSFORM (object), FALSE);

  if (object->transform) {
-    if (!object->need_start)
+    if (object->running)
      object->transform->ProcessMessage (MFT_MESSAGE_COMMAND_FLUSH, 0);

    object->pending_have_output = 0;
    object->pending_need_input = 0;
  }

-  object->need_start = TRUE;
+  object->running = FALSE;

  while (!g_queue_is_empty (object->output_queue)) {
    IMFSample *sample = (IMFSample *) g_queue_pop_head (object->output_queue);
@ -628,7 +628,7 @@ gst_mf_transform_drain (GstMFTransform * object)
  if (!object->transform)
    return TRUE;

-  object->need_start = TRUE;
+  object->running = FALSE;
  object->transform->ProcessMessage (MFT_MESSAGE_COMMAND_DRAIN, 0);

  if (object->hardware) {
@ -946,6 +946,56 @@ gst_mf_transform_set_output_type (GstMFTransform * object,
  return TRUE;
 }

+gboolean
+gst_mf_transform_get_input_current_type (GstMFTransform * object,
+    IMFMediaType ** input_type)
+{
+  IMFTransform *transform;
+  HRESULT hr;
+
+  g_return_val_if_fail (GST_IS_MF_TRANSFORM (object), FALSE);
+  g_return_val_if_fail (input_type != NULL, FALSE);
+
+  transform = object->transform;
+
+  if (!transform) {
+    GST_ERROR_OBJECT (object, "Should open first");
+    return FALSE;
+  }
+
+  hr = transform->GetInputCurrentType (object->input_id, input_type);
+  if (!gst_mf_result (hr)) {
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
+gboolean
+gst_mf_transform_get_output_current_type (GstMFTransform * object,
+    IMFMediaType ** output_type)
+{
+  IMFTransform *transform;
+  HRESULT hr;
+
+  g_return_val_if_fail (GST_IS_MF_TRANSFORM (object), FALSE);
+  g_return_val_if_fail (output_type != NULL, FALSE);
+
+  transform = object->transform;
+
+  if (!transform) {
+    GST_ERROR_OBJECT (object, "Should open first");
+    return FALSE;
+  }
+
+  hr = transform->GetOutputCurrentType (object->output_id, output_type);
+  if (!gst_mf_result (hr)) {
+    return FALSE;
+  }
+
+  return TRUE;
+}
+
 GstMFTransform *
 gst_mf_transform_new (GstMFTransformEnumParams * params)
 {
--- a/sys/mediafoundation/gstmftransform.h
+++ b/sys/mediafoundation/gstmftransform.h
@ -76,6 +76,12 @@ gboolean        gst_mf_transform_set_input_type  (GstMFTransform * object,
 gboolean        gst_mf_transform_set_output_type (GstMFTransform * object,
                                                  IMFMediaType * output_type);

+gboolean        gst_mf_transform_get_input_current_type  (GstMFTransform * object,
+                                                          IMFMediaType ** input_type);
+
+gboolean        gst_mf_transform_get_output_current_type (GstMFTransform * object,
+                                                          IMFMediaType ** output_type);
+
 gboolean        gst_mf_transform_set_codec_api_uint32  (GstMFTransform * object,
                                                        const GUID * api,
                                                        guint32 value);
--- a/sys/mediafoundation/meson.build
+++ b/sys/mediafoundation/meson.build
@ -8,6 +8,8 @@ mf_sources = [
  'gstmfvideosrc.c',
  'gstmfsourceobject.c',
  'gstmfdevice.c',
+  'gstmfaudioenc.cpp',
+  'gstmfaacenc.cpp',
 ]

 mf_desktop_sources = [
@ -127,7 +129,7 @@ gstmediafoundation = library('gstmediafoundation',
  c_args : gst_plugins_bad_args + ['-DCOBJMACROS'],
  cpp_args : gst_plugins_bad_args,
  include_directories : [configinc],
-  dependencies : [gstbase_dep, gstvideo_dep, gstpbutils_dep] + mf_lib_deps,
+  dependencies : [gstbase_dep, gstvideo_dep, gstaudio_dep, gstpbutils_dep] + mf_lib_deps,
  install : true,
  install_dir : plugins_install_dir,
 )
--- a/sys/mediafoundation/plugin.c
+++ b/sys/mediafoundation/plugin.c
@ -32,6 +32,7 @@
 #include "gstmfutils.h"
 #include "gstmfh264enc.h"
 #include "gstmfh265enc.h"
+#include "gstmfaacenc.h"

 GST_DEBUG_CATEGORY (gst_mf_debug);
 GST_DEBUG_CATEGORY (gst_mf_utils_debug);
@ -71,6 +72,7 @@ plugin_init (GstPlugin * plugin)

  gst_mf_h264_enc_plugin_init (plugin, GST_RANK_SECONDARY);
  gst_mf_h265_enc_plugin_init (plugin, GST_RANK_SECONDARY);
+  gst_mf_aac_enc_plugin_init (plugin, GST_RANK_SECONDARY);

  return TRUE;
 }