gstreamer/subprojects/gst-plugins-good/sys/osxaudio/gstatenc.c

944 lines
32 KiB
C
Raw Normal View History

/*
* Copyright (C) 2024 Piotr Brzeziński <piotr@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
/**
* SECTION:element-atenc
* @title: atenc
*
* AudioToolbox based encoder.
* ## Example launch line
* |[
* gst-launch-1.0 -v audiotestsrc ! atenc ! mp4mux ! filesink location=test.m4a
* ]|
* Encodes audio from audiotestsrc and writes it to a file.
*
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstatenc.h"
enum
{
PROP_0,
PROP_BITRATE,
PROP_RATE_CONTROL,
PROP_VBR_QUALITY,
};
#define DEFAULT_BITRATE 0
#define DEFAULT_RATE_CONTROL GST_ATENC_RATE_CONTROL_CONSTANT
#define DEFAULT_VBR_QUALITY 65
#define ES_DESCRIPTOR_TAG 0x03
#define DECODER_CONFIG_DESC_TAG 0x04
#define DECODER_SPECIFIC_INFO_TAG 0x05
#define SAMPLE_RATES " 8000, " \
"11025, " \
"12000, " \
"16000, " \
"22050, " \
"24000, " \
"32000, " \
"44100, " \
"48000 "
/* Higher sample rates were failing when initializing the encoder.
* Probably supported only in specific circumstances, hard to find documentation about that. */
/* *INDENT-OFF* */
static const GstATEncLayout aac_layouts[] = {
{
1, kAudioChannelLayoutTag_Mono, { GST_AUDIO_CHANNEL_POSITION_MONO }}, {
2, kAudioChannelLayoutTag_Stereo, {
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT }}, {
/* C L R */
3, kAudioChannelLayoutTag_AAC_3_0, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT }}, {
/* C L R Cs */
4, kAudioChannelLayoutTag_AAC_4_0, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_REAR_CENTER }}, {
/* C L R Ls Rs */
5, kAudioChannelLayoutTag_AAC_5_0, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT }}, {
/* C L R Ls Rs Lfe */
6, kAudioChannelLayoutTag_AAC_5_1, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
/* C L R Ls Rs Cs */
6, kAudioChannelLayoutTag_AAC_6_0, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_REAR_CENTER }}, {
/* C L R Ls Rs Cs Lfe */
7, kAudioChannelLayoutTag_AAC_6_1, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_REAR_CENTER,
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
/* C L R Ls Rs Rls Rrs */
7, kAudioChannelLayoutTag_AAC_7_0, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_REAR_LEFT,
GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT }}, {
/* C Lc Rc L R Ls Rs Lfe */
8, kAudioChannelLayoutTag_AAC_7_1, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT_OF_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT_OF_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
/* C L R Ls Rs Rls Rrs LFE */
8, kAudioChannelLayoutTag_AAC_7_1_B, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_REAR_LEFT,
GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT,
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
/* C L R Ls Rs LFE Vhl Vhr */
8, kAudioChannelLayoutTag_AAC_7_1_C, {
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
GST_AUDIO_CHANNEL_POSITION_LFE1,
GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_LEFT,
GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_RIGHT }}, {
/* Only used when iterating through all positions */
0, kAudioChannelLayoutTag_Unknown, { 0 } }
};
/* *INDENT-ON* */
static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("audio/x-raw, "
"format = (string) " GST_AUDIO_NE (S16) ", "
"layout = (string) interleaved, "
"rate = (int) { " SAMPLE_RATES " }, channels = (int) [ 1, 8 ]")
);
static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("audio/mpeg, "
"mpegversion = (int) 4, "
"rate = (int) { " SAMPLE_RATES " }, "
"channels = (int) [ 1, 8 ], "
"stream-format = (string) raw, "
"profile = (string) lc, framed = (boolean) true")
);
GST_DEBUG_CATEGORY_STATIC (gst_atenc_debug);
#define GST_CAT_DEFAULT gst_atenc_debug
G_DEFINE_TYPE (GstATEnc, gst_atenc, GST_TYPE_AUDIO_ENCODER);
GST_ELEMENT_REGISTER_DEFINE (atenc, "atenc", GST_RANK_PRIMARY, GST_TYPE_ATENC);
#define GST_ATENC_RATE_CONTROL (gst_atenc_rate_control_get_type ())
static GType
gst_atenc_rate_control_get_type (void)
{
static GType atenc_rate_control_type = 0;
static const GEnumValue types[] = {
{GST_ATENC_RATE_CONTROL_CONSTANT, "Constant bitrate", "cbr"},
{GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE, "Long-term-average bitrate",
"lta"},
{GST_ATENC_RATE_CONTROL_VARIABLE_CONSTRAINED,
"Constrained variable bitrate", "cvbr"},
{GST_ATENC_RATE_CONTROL_VARIABLE, "Variable bitrate", "vbr"},
{0, NULL, NULL}
};
if (!atenc_rate_control_type)
atenc_rate_control_type =
g_enum_register_static ("GstATEncRateControl", types);
return atenc_rate_control_type;
}
static void
gst_atenc_set_property (GObject * object, guint prop_id,
const GValue * value, GParamSpec * pspec)
{
GstATEnc *self = GST_ATENC (object);
switch (prop_id) {
case PROP_BITRATE:
self->bitrate = g_value_get_uint (value);
break;
case PROP_RATE_CONTROL:
self->rate_control = g_value_get_enum (value);
break;
case PROP_VBR_QUALITY:
self->vbr_quality = g_value_get_uint (value);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static void
gst_atenc_get_property (GObject * object, guint prop_id,
GValue * value, GParamSpec * pspec)
{
GstATEnc *self = GST_ATENC (object);
switch (prop_id) {
case PROP_BITRATE:
g_value_set_uint (value, self->bitrate);
break;
case PROP_RATE_CONTROL:
g_value_set_enum (value, self->rate_control);
break;
case PROP_VBR_QUALITY:
g_value_set_uint (value, self->vbr_quality);
break;
default:
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
break;
}
}
static gboolean
gst_atenc_start (GstAudioEncoder * enc)
{
GstATEnc *self = GST_ATENC (enc);
GST_DEBUG_OBJECT (self, "Starting encoder");
self->input_queue = gst_queue_array_new (0);
gst_queue_array_set_clear_func (self->input_queue,
(GDestroyNotify) gst_buffer_unref);
return TRUE;
}
static void
gst_atenc_flush (GstAudioEncoder * enc)
{
GstATEnc *self = GST_ATENC (enc);
GST_DEBUG_OBJECT (self, "Flushing encoder");
AudioConverterReset (self->converter);
gst_queue_array_clear (self->input_queue);
}
static gboolean
gst_atenc_stop (GstAudioEncoder * enc)
{
GstATEnc *self = GST_ATENC (enc);
GST_DEBUG_OBJECT (self, "Stopping encoder");
gst_atenc_flush (enc);
if (self->converter) {
AudioConverterDispose (self->converter);
self->converter = NULL;
}
gst_queue_array_free (self->input_queue);
self->input_queue = NULL;
if (self->used_buffer) {
gst_audio_buffer_unmap (self->used_buffer);
gst_buffer_unref (self->used_buffer->buffer);
g_free (self->used_buffer);
self->used_buffer = NULL;
}
return TRUE;
}
static GstCaps *
gst_atenc_get_caps (GstAudioEncoder * enc, GstCaps * filter)
{
GstCaps *layout_caps, *ret, *caps = gst_caps_new_empty ();
const GstATEncLayout *layout;
guint64 channel_mask;
for (layout = aac_layouts; layout->channels; layout++) {
layout_caps =
gst_caps_make_writable (gst_pad_get_pad_template_caps
(GST_AUDIO_ENCODER_SINK_PAD (enc)));
if (layout->channels == 1) {
gst_caps_set_simple (layout_caps, "channels", G_TYPE_INT,
layout->channels, NULL);
} else {
gst_audio_channel_positions_to_mask (layout->positions, layout->channels,
FALSE, &channel_mask);
gst_caps_set_simple (layout_caps, "channels", G_TYPE_INT,
layout->channels, "channel-mask", GST_TYPE_BITMASK, channel_mask,
NULL);
}
gst_caps_append (caps, layout_caps);
}
ret = gst_audio_encoder_proxy_getcaps (enc, caps, filter);
gst_caps_unref (caps);
return ret;
}
static OSStatus
gst_atenc_fill_buffer (AudioConverterRef converter, UInt32 * packets_amount,
AudioBufferList * buffers, AudioStreamPacketDescription ** desc,
void *user_data)
{
GstATEnc *self = GST_ATENC (user_data);
GstBuffer *buf;
GstAudioBuffer *audio_buf;
GstAudioInfo *audio_info;
UInt32 wanted_samples = *packets_amount;
/* We can now safely clean up the buffer that was previously passed to AT */
if (self->used_buffer) {
gst_audio_buffer_unmap (self->used_buffer);
gst_buffer_unref (self->used_buffer->buffer);
g_free (self->used_buffer);
self->used_buffer = NULL;
}
/* See https://developer.apple.com/library/archive/qa/qa1317/_index.html
* packets_amount indicates how much data is expected to be filled in.
*
* The way this is set up, we tell the base class how many samples AT will expect,
* and it will provide us with that much. Only exception is at the end of stream,
* where there might not be enough data. Thankfully, if we signal EOS, AT will encode
* whatever it got as input, without needing to silence-pad to the expected amount.
*
* In case of less data than packets_amount => set that to the actual value and return noErr
* No data currently available, but more is expected => packets_amount=0 and return 1
* No data available and input got EOS => packets_amount=0 and return noErr
*/
buf = gst_queue_array_pop_head (self->input_queue);
if (!buf) {
*packets_amount = 0;
if (self->input_eos) {
GST_DEBUG_OBJECT (self, "No more input data, returning noErr");
return noErr;
} else {
GST_LOG_OBJECT (self, "No input buffer yet, waiting for more data");
return 1;
}
}
/* We can only unmap the audio_buffer in the next callback, but in the meantime
* the base class can invalidate the underlying buffer. Ref it manually to ensure
* it lives long enough. */
gst_buffer_ref (buf);
audio_info = gst_audio_encoder_get_audio_info (GST_AUDIO_ENCODER (self));
audio_buf = g_malloc0 (sizeof (GstAudioBuffer));
gst_audio_buffer_map (audio_buf, audio_info, buf, GST_MAP_READ);
/* Pushing this as a pointer instead of using the _struct() variants
* because GstAudioBuffer contains self-references, so we'd get dangling pointers otherwise. */
self->used_buffer = audio_buf;
buffers->mNumberBuffers = 1;
buffers->mBuffers[0].mNumberChannels = GST_AUDIO_INFO_CHANNELS (audio_info);
buffers->mBuffers[0].mDataByteSize = GST_AUDIO_BUFFER_PLANE_SIZE (audio_buf);
buffers->mBuffers[0].mData = GST_AUDIO_BUFFER_PLANE_DATA (audio_buf, 0);
*packets_amount = audio_buf->n_samples;
GST_LOG_OBJECT (self, "Wanted %d packets, filled %d", wanted_samples,
*packets_amount);
return noErr;
}
static GstFlowReturn
gst_atenc_handle_frame (GstAudioEncoder * enc, GstBuffer * buffer)
{
GstATEnc *self = GST_ATENC (enc);
OSStatus status;
GstBuffer *outbuf;
GstFlowReturn ret;
GstMapInfo map_info;
GstAudioInfo *audio_info;
AudioBufferList out_bufs = { 0 };
AudioStreamPacketDescription out_desc = { 0 };
UInt32 out_packets;
if (!buffer) {
self->input_eos = TRUE;
GST_DEBUG_OBJECT (self, "No input buffer, draining encoder");
} else {
self->input_eos = FALSE;
gst_queue_array_push_tail (self->input_queue, buffer);
GST_LOG ("Pushed buffer to queue");
}
outbuf =
gst_audio_encoder_allocate_output_buffer (enc,
self->max_output_buffer_size);
if (!outbuf) {
GST_ERROR_OBJECT (self, "Failed to allocate output buffer");
return GST_FLOW_ERROR;
}
gst_buffer_map (outbuf, &map_info, GST_MAP_WRITE);
audio_info = gst_audio_encoder_get_audio_info (enc);
out_bufs.mNumberBuffers = 1;
out_bufs.mBuffers[0].mNumberChannels = GST_AUDIO_INFO_CHANNELS (audio_info);
out_bufs.mBuffers[0].mDataByteSize = self->max_output_buffer_size;
out_bufs.mBuffers[0].mData = map_info.data;
out_packets = 1;
status =
AudioConverterFillComplexBuffer (self->converter, gst_atenc_fill_buffer,
self, &out_packets, &out_bufs, &out_desc);
/* gst_atenc_fill_buffer will return 1 when it doesn't have enough data yet */
if (status != noErr && status != 1) {
GST_ERROR_OBJECT (self, "Failed to fill buffer: %d", status);
return GST_FLOW_ERROR;
}
if (out_packets == 0) {
GST_LOG_OBJECT (self, "No packets produced, more data needed or input EOS");
gst_buffer_unmap (outbuf, &map_info);
gst_buffer_unref (outbuf);
return GST_FLOW_OK;
}
gst_buffer_unmap (outbuf, &map_info);
/* On exit, mDataByteSize is set to the number of bytes written. */
GST_LOG_OBJECT (self, "Output buffer size: %d", out_desc.mDataByteSize);
g_assert (out_desc.mDataByteSize <= self->max_output_buffer_size);
gst_buffer_set_size (outbuf, out_desc.mDataByteSize);
ret = gst_audio_encoder_finish_frame (enc, outbuf, self->n_output_samples);
return ret;
}
static void
gst_atenc_fill_input_layout (GstAudioInfo * info, AudioChannelLayout * layout)
{
const GstAudioChannelPosition *input_positions =
&GST_AUDIO_INFO_POSITION (info, 0);
layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
layout->mNumberChannelDescriptions = GST_AUDIO_INFO_CHANNELS (info);
for (int i = 0; i < GST_AUDIO_INFO_CHANNELS (info); i++) {
layout->mChannelDescriptions[i].mChannelLabel =
gst_audio_channel_position_to_core_audio (input_positions[i], i);
}
}
static AudioChannelLayoutTag
gst_atenc_get_output_layout_tag (GstATEnc * self, GstAudioInfo * info)
{
const GstAudioChannelPosition *input_positions =
&GST_AUDIO_INFO_POSITION (info, 0);
const GstATEncLayout *layout;
gint input_channels = GST_AUDIO_INFO_CHANNELS (info);
guint64 input_ch_mask;
gst_audio_channel_positions_to_mask (input_positions, input_channels, FALSE,
&input_ch_mask);
/* Try to find a predefined output layout that matches the input channels.
* Order doesn't matter - we set channel descriptions on input, so AT will reorder internally. */
for (layout = aac_layouts; layout->channels; layout++) {
const GstAudioChannelPosition *output_positions = layout->positions;
guint64 layout_ch_mask;
if (layout->channels != input_channels)
continue;
gst_audio_channel_positions_to_mask (output_positions, layout->channels,
FALSE, &layout_ch_mask);
if (input_ch_mask != layout_ch_mask)
continue;
return layout->aac_tag;
}
return kAudioChannelLayoutTag_Unknown;
}
static bool
_parse_descriptor (GstByteReader * br, guint8 * tag, gint * len)
{
gint size_of_instance = 0;
guint8 size_byte;
gboolean has_next_byte;
/* Descriptors are variable size, parse it according
* to the formula in sec. 14.3.3 of ISO/IEC 14496-1.
* First 8 bits is the tag. */
if (!gst_byte_reader_get_uint8 (br, tag))
return FALSE;
/* Following is one or more size_byte, in which bit 1 tells us if we should parse further,
* and the remaining 7 bits are the actual (portion of the) size */
do {
if (!gst_byte_reader_get_uint8 (br, &size_byte))
return FALSE;
has_next_byte = size_byte & 0x80;
size_of_instance = (size_of_instance << 7) | (size_byte & 0x7f);
g_assert (size_of_instance >= 0);
} while (has_next_byte && gst_byte_reader_get_remaining (br) > 0);
if (len)
*len = size_of_instance;
return TRUE;
}
static void
gst_atenc_extract_audio_specific_config (guint8 * cookie_buf, guint cookie_size,
guint8 ** asc, guint * asc_size)
{
GstByteReader *br = gst_byte_reader_new (cookie_buf, cookie_size);
gint len;
guint8 tag, flags, flag_skip;
/* Cookie data is a MPEG descriptor structure, we need to extract the AudioSpecificConfig.
* Structures parsed below are described in ISO/IEC 14496-1 */
while (gst_byte_reader_get_remaining (br) > 0) {
if (!_parse_descriptor (br, &tag, NULL))
break;
if (tag == ES_DESCRIPTOR_TAG) {
/* First, find the ES_Descriptor and parse flags that tell us how many bits to skip */
if (!gst_byte_reader_skip (br, 2))
break;
if (!gst_byte_reader_get_uint8 (br, &flags))
break;
if (flags & 0x80)
if (!gst_byte_reader_skip (br, 2))
break;
if (flags & 0x40) {
if (!gst_byte_reader_get_uint8 (br, &flag_skip))
break;
if (!gst_byte_reader_skip (br, flag_skip))
break;
}
if (flags & 0x20)
if (!gst_byte_reader_skip (br, 2))
break;
} else if (tag == DECODER_CONFIG_DESC_TAG) {
/* Then we get the DecoderConfigDescriptor and skip its first 13 bytes to get to DecoderSpecificInfo */
if (!gst_byte_reader_skip (br, 13))
break;
if (!_parse_descriptor (br, &tag, &len))
break;
/* DecoderSpecificInfo is the AudioSpecificConfig in our case */
if (tag == DECODER_SPECIFIC_INFO_TAG) {
*asc_size = len;
*asc = g_malloc0 (*asc_size);
if (!gst_byte_reader_dup_data (br, *asc_size, asc)) {
g_free (*asc);
*asc = NULL;
}
break;
}
}
}
}
static gboolean
gst_atenc_set_format (GstAudioEncoder * enc, GstAudioInfo * info)
{
GstATEnc *self = GST_ATENC (enc);
AudioStreamBasicDescription input_desc = { 0 };
AudioStreamBasicDescription output_desc = { 0 };
AudioChannelLayout *layout = NULL;
AudioChannelLayoutTag output_layout_tag;
GstCaps *src_caps;
OSStatus status;
gboolean ret;
UInt32 prop_size, max_output_size;
guint8 *cookie_data = NULL;
guint8 *audio_config = NULL;
guint32 audio_config_size = 0;
GstBuffer *asc_buf;
if (self->converter) {
/* Drain any leftover data from encoder */
gst_atenc_handle_frame (enc, NULL);
AudioConverterDispose (self->converter);
self->converter = NULL;
}
input_desc.mSampleRate = GST_AUDIO_INFO_RATE (info);
input_desc.mFormatID = kAudioFormatLinearPCM;
input_desc.mFormatFlags =
kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
input_desc.mFramesPerPacket = 1;
input_desc.mBytesPerFrame = input_desc.mBytesPerPacket =
GST_AUDIO_INFO_BPF (info);
input_desc.mChannelsPerFrame = GST_AUDIO_INFO_CHANNELS (info);
input_desc.mBitsPerChannel = GST_AUDIO_INFO_DEPTH (info);
/* HE-AAC v1/v2 and LD to be added later.
* For LD, AudioSpecificConfig parsing fails completely, might be due to faulty MPEG descriptor parsing.
* For HE-AAC, channel configurations need testing (also sometimes fail to parse). */
output_desc.mFormatID = kAudioFormatMPEG4AAC;
output_desc.mSampleRate = GST_AUDIO_INFO_RATE (info);
output_desc.mChannelsPerFrame = GST_AUDIO_INFO_CHANNELS (info);
status = AudioConverterNew (&input_desc, &output_desc, &self->converter);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to create audio converter: %d", status);
return FALSE;
}
/* Using the encoder-provided size results in kAudioCodecBadPropertySizeError, so let's calculate it manually... */
prop_size =
sizeof (AudioChannelLayout) +
sizeof (AudioChannelDescription) * GST_AUDIO_INFO_CHANNELS (info);
layout = g_malloc0 (prop_size);
/* For input, AT expects per-channel descriptions to be used */
gst_atenc_fill_input_layout (info, layout);
status =
AudioConverterSetProperty (self->converter,
kAudioConverterInputChannelLayout, prop_size, layout);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to set input channel layout: %d", status);
g_free (layout);
return FALSE;
}
/* For output, instead of channel descriptions, we use an AAC tag indicating one of the predefined layouts */
output_layout_tag = gst_atenc_get_output_layout_tag (self, info);
if (output_layout_tag == kAudioChannelLayoutTag_Unknown) {
GST_ERROR_OBJECT (self,
"Failed to find a matching output channel layout tag");
g_free (layout);
return FALSE;
}
layout->mChannelLayoutTag = output_layout_tag;
layout->mNumberChannelDescriptions = 0;
status =
AudioConverterSetProperty (self->converter,
kAudioConverterOutputChannelLayout, prop_size, layout);
g_free (layout);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to set output channel layout: %d", status);
return FALSE;
}
/* TODO: Check if this works on iOS */
status =
AudioConverterSetProperty (self->converter,
kAudioCodecPropertyBitRateControlMode, sizeof (UInt32),
&self->rate_control);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to set bitrate control mode: %d", status);
return FALSE;
}
if (self->rate_control == GST_ATENC_RATE_CONTROL_VARIABLE) {
status =
AudioConverterSetProperty (self->converter,
kAudioCodecPropertySoundQualityForVBR, sizeof (UInt32),
&self->vbr_quality);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to set VBR quality: %d", status);
return FALSE;
}
}
if (self->bitrate > 0
&& (self->rate_control == GST_ATENC_RATE_CONTROL_CONSTANT
|| self->rate_control == GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE)) {
/* Query the encoder for possible bitrate values and adjust if needed */
AudioValueRange *bitrate_ranges;
UInt32 actual_bitrate;
status =
AudioConverterGetPropertyInfo (self->converter,
kAudioConverterApplicableEncodeBitRates, &prop_size, NULL);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to get possible bitrates size: %d",
status);
return FALSE;
}
bitrate_ranges = g_malloc (prop_size);
status =
AudioConverterGetProperty (self->converter,
kAudioConverterApplicableEncodeBitRates, &prop_size, bitrate_ranges);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to get possible bitrates: %d", status);
g_free (bitrate_ranges);
return FALSE;
}
GST_LOG_OBJECT (self, "Allowed bitrate ranges:");
for (int i = 0; i < prop_size / sizeof (AudioValueRange); i++) {
AudioValueRange *range = &bitrate_ranges[i];
GST_LOG_OBJECT (self, "%d: %f - %f",
i + 1, range->mMinimum, range->mMaximum);
}
/* Returned ranges are ordered from lowest to highest values */
for (int i = 0; i < prop_size / sizeof (AudioValueRange); i++) {
AudioValueRange *range = &bitrate_ranges[i];
if (self->bitrate == range->mMinimum && self->bitrate == range->mMaximum) {
/* Often the min/max values are identical, so not that much of a range... */
actual_bitrate = self->bitrate;
break;
} else if (self->bitrate < range->mMinimum) {
actual_bitrate = range->mMinimum;
break;
} else if (self->bitrate > range->mMaximum) {
/* We might find higher values still, so no break */
actual_bitrate = range->mMaximum;
}
}
if (actual_bitrate != self->bitrate) {
GST_WARNING_OBJECT (self,
"Requested bitrate %d not in the allowed range, using %d",
self->bitrate, actual_bitrate);
self->bitrate = actual_bitrate;
}
/* TODO: This could be changed at any time instead of just in set_format,
* but from initial testing, changing the bitrate when encoding introduces
* a very short pause in encoded sound. Needs investigation. */
status =
AudioConverterSetProperty (self->converter,
kAudioConverterEncodeBitRate, sizeof (UInt32), &actual_bitrate);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to set bitrate: %d", status);
g_free (bitrate_ranges);
return FALSE;
}
}
/* After creation, encoder fills input/output desc with more details */
prop_size = sizeof (output_desc);
status =
AudioConverterGetProperty (self->converter,
kAudioConverterCurrentOutputStreamDescription, &prop_size, &output_desc);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to get output format: %d", status);
return FALSE;
}
self->n_output_samples = output_desc.mFramesPerPacket;
GST_DEBUG_OBJECT (self, "samples per output packet: %d",
self->n_output_samples);
/* This isn't always set, so we might need to query manually */
max_output_size = output_desc.mBytesPerPacket;
if (max_output_size == 0) {
prop_size = sizeof (max_output_size);
status =
AudioConverterGetProperty (self->converter,
kAudioConverterPropertyMaximumOutputPacketSize, &prop_size,
&max_output_size);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to get maximum output packet size: %d",
status);
return FALSE;
}
}
self->max_output_buffer_size = max_output_size;
GST_DEBUG_OBJECT (self, "maximum output buffer size: %d",
self->max_output_buffer_size);
/* For AAC, AT usually asks for 1024 samples per packet, base class needs to know */
gst_audio_encoder_set_frame_max (enc, 1);
gst_audio_encoder_set_frame_samples_min (enc, self->n_output_samples);
gst_audio_encoder_set_frame_samples_max (enc, self->n_output_samples);
gst_audio_encoder_set_drainable (enc, TRUE);
/* FIXME: Handle lookahead according to kAudioConverterPrimeInfo.leadingFrames.
* When passed directly to gst_audio_encoder_set_lookahead, causes
* an audible skip in audio, and muxers such as mp4mux error out.
* To be investigated. */
status =
AudioConverterGetPropertyInfo (self->converter,
kAudioConverterCompressionMagicCookie, &prop_size, NULL);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to get magic cookie size: %d", status);
return FALSE;
}
cookie_data = g_malloc (prop_size);
status =
AudioConverterGetProperty (self->converter,
kAudioConverterCompressionMagicCookie, &prop_size, cookie_data);
if (status != noErr) {
GST_ERROR_OBJECT (self, "Failed to get magic cookie: %d", status);
g_free (cookie_data);
return FALSE;
}
/* Cookie contains a bunch of descriptors, gotta dig a bit to get the AudioSpecificConfig */
gst_atenc_extract_audio_specific_config (cookie_data, prop_size,
&audio_config, &audio_config_size);
if (!audio_config) {
GST_ERROR_OBJECT (self, "Failed to extract AudioSpecificConfig");
g_free (cookie_data);
return FALSE;
}
asc_buf = gst_buffer_new_wrapped (audio_config, audio_config_size);
src_caps = gst_caps_new_simple ("audio/mpeg",
"mpegversion", G_TYPE_INT, 4,
"rate", G_TYPE_INT, GST_AUDIO_INFO_RATE (info),
"channels", G_TYPE_INT, GST_AUDIO_INFO_CHANNELS (info),
"stream-format", G_TYPE_STRING, "raw",
"framed", G_TYPE_BOOLEAN, TRUE,
"codec_data", GST_TYPE_BUFFER, asc_buf, NULL);
gst_codec_utils_aac_caps_set_level_and_profile (src_caps, audio_config,
audio_config_size);
gst_buffer_unref (asc_buf);
g_free (cookie_data);
ret = gst_audio_encoder_set_output_format (enc, src_caps);
GST_DEBUG ("output caps: %" GST_PTR_FORMAT, src_caps);
gst_caps_unref (src_caps);
return ret;
}
static void
gst_atenc_init (GstATEnc * self)
{
self->bitrate = DEFAULT_BITRATE;
self->rate_control = DEFAULT_RATE_CONTROL;
self->vbr_quality = DEFAULT_VBR_QUALITY;
self->input_eos = FALSE;
self->used_buffer = NULL;
}
static void
gst_atenc_class_init (GstATEncClass * klass)
{
GObjectClass *object_class = G_OBJECT_CLASS (klass);
GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
GstAudioEncoderClass *base_class = GST_AUDIO_ENCODER_CLASS (klass);
object_class->set_property = GST_DEBUG_FUNCPTR (gst_atenc_set_property);
object_class->get_property = GST_DEBUG_FUNCPTR (gst_atenc_get_property);
base_class->start = GST_DEBUG_FUNCPTR (gst_atenc_start);
base_class->stop = GST_DEBUG_FUNCPTR (gst_atenc_stop);
base_class->getcaps = GST_DEBUG_FUNCPTR (gst_atenc_get_caps);
base_class->set_format = GST_DEBUG_FUNCPTR (gst_atenc_set_format);
base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_atenc_handle_frame);
base_class->flush = GST_DEBUG_FUNCPTR (gst_atenc_flush);
/**
* GstATEnc:bitrate:
*
* Target output bitrate in bps, for CBR and LTA rate control modes.
*
* Since: 1.26
*/
g_object_class_install_property (object_class, PROP_BITRATE,
g_param_spec_uint ("bitrate",
"Bitrate",
"target output bitrate in bps (for rate-control=cbr/lta) (0 - auto)",
0, G_MAXUINT32, DEFAULT_BITRATE,
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
/**
* GstATEnc:rate-control:
*
* Rate control mode to be applied by the encoder.
* CBR and LTA modes use the bitrate property, VBR uses the vbr-quality property.
* Constrained VBR determines the bitrate/quality automatically based on the input signal.
*
* Since: 1.26
*/
g_object_class_install_property (object_class, PROP_RATE_CONTROL,
g_param_spec_enum ("rate-control",
"Rate control",
"Mode of output bitrate control to be applied",
GST_ATENC_RATE_CONTROL,
DEFAULT_RATE_CONTROL, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
/**
* GstATEnc:vbr-quality:
*
* Sound quality setting for VBR encoding.
*
* Since: 1.26
*/
g_object_class_install_property (object_class, PROP_VBR_QUALITY,
g_param_spec_uint ("vbr-quality",
"VBR quality",
"Sound quality setting for VBR encoding (rate-control=vbr) (0-127)",
0, 127, DEFAULT_VBR_QUALITY,
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
gst_element_class_add_static_pad_template (element_class, &sink_template);
gst_element_class_add_static_pad_template (element_class, &src_template);
gst_element_class_set_static_metadata (element_class,
"AudioToolbox audio encoder", "Coder/Encoder/Audio/Converter",
"AudioToolbox based audio encoder for macOS/iOS",
"Piotr Brzeziński <piotr@centricular.com>");
GST_DEBUG_CATEGORY_INIT (gst_atenc_debug, "atenc", 0,
"AudioToolbox based audio encoder");
}