mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2024-12-20 07:16:55 +00:00
0ef396359c
And change lengths and indices from guint to gsize for a more correct type. Also deprecate GstQueueArray and implement it in terms of GstVecDeque. Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/6779>
943 lines
32 KiB
C
943 lines
32 KiB
C
/*
|
|
* Copyright (C) 2024 Piotr Brzeziński <piotr@centricular.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Library General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Library General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Library General Public
|
|
* License along with this library; if not, write to the
|
|
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
|
|
* Boston, MA 02110-1301, USA.
|
|
*/
|
|
/**
|
|
* SECTION:element-atenc
|
|
* @title: atenc
|
|
*
|
|
* AudioToolbox based encoder.
|
|
* ## Example launch line
|
|
* |[
|
|
* gst-launch-1.0 -v audiotestsrc ! atenc ! mp4mux ! filesink location=test.m4a
|
|
* ]|
|
|
* Encodes audio from audiotestsrc and writes it to a file.
|
|
*
|
|
*/
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
#include "config.h"
|
|
#endif
|
|
|
|
#include "gstatenc.h"
|
|
|
|
enum
|
|
{
|
|
PROP_0,
|
|
PROP_BITRATE,
|
|
PROP_RATE_CONTROL,
|
|
PROP_VBR_QUALITY,
|
|
};
|
|
|
|
#define DEFAULT_BITRATE 0
|
|
#define DEFAULT_RATE_CONTROL GST_ATENC_RATE_CONTROL_CONSTANT
|
|
#define DEFAULT_VBR_QUALITY 65
|
|
|
|
#define ES_DESCRIPTOR_TAG 0x03
|
|
#define DECODER_CONFIG_DESC_TAG 0x04
|
|
#define DECODER_SPECIFIC_INFO_TAG 0x05
|
|
|
|
#define SAMPLE_RATES " 8000, " \
|
|
"11025, " \
|
|
"12000, " \
|
|
"16000, " \
|
|
"22050, " \
|
|
"24000, " \
|
|
"32000, " \
|
|
"44100, " \
|
|
"48000 "
|
|
/* Higher sample rates were failing when initializing the encoder.
|
|
* Probably supported only in specific circumstances, hard to find documentation about that. */
|
|
|
|
/* *INDENT-OFF* */
|
|
static const GstATEncLayout aac_layouts[] = {
|
|
{
|
|
1, kAudioChannelLayoutTag_Mono, { GST_AUDIO_CHANNEL_POSITION_MONO }}, {
|
|
2, kAudioChannelLayoutTag_Stereo, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT }}, {
|
|
/* C L R */
|
|
3, kAudioChannelLayoutTag_AAC_3_0, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT }}, {
|
|
/* C L R Cs */
|
|
4, kAudioChannelLayoutTag_AAC_4_0, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_CENTER }}, {
|
|
/* C L R Ls Rs */
|
|
5, kAudioChannelLayoutTag_AAC_5_0, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT }}, {
|
|
/* C L R Ls Rs Lfe */
|
|
6, kAudioChannelLayoutTag_AAC_5_1, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
|
|
/* C L R Ls Rs Cs */
|
|
6, kAudioChannelLayoutTag_AAC_6_0, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_CENTER }}, {
|
|
/* C L R Ls Rs Cs Lfe */
|
|
7, kAudioChannelLayoutTag_AAC_6_1, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
|
|
/* C L R Ls Rs Rls Rrs */
|
|
7, kAudioChannelLayoutTag_AAC_7_0, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT }}, {
|
|
/* C Lc Rc L R Ls Rs Lfe */
|
|
8, kAudioChannelLayoutTag_AAC_7_1, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT_OF_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT_OF_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
|
|
/* C L R Ls Rs Rls Rrs LFE */
|
|
8, kAudioChannelLayoutTag_AAC_7_1_B, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_REAR_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_LFE1 }}, {
|
|
/* C L R Ls Rs LFE Vhl Vhr */
|
|
8, kAudioChannelLayoutTag_AAC_7_1_C, {
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_CENTER,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_FRONT_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_SURROUND_RIGHT,
|
|
GST_AUDIO_CHANNEL_POSITION_LFE1,
|
|
GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_LEFT,
|
|
GST_AUDIO_CHANNEL_POSITION_TOP_FRONT_RIGHT }}, {
|
|
/* Only used when iterating through all positions */
|
|
0, kAudioChannelLayoutTag_Unknown, { 0 } }
|
|
};
|
|
/* *INDENT-ON* */
|
|
|
|
static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE ("sink",
|
|
GST_PAD_SINK,
|
|
GST_PAD_ALWAYS,
|
|
GST_STATIC_CAPS ("audio/x-raw, "
|
|
"format = (string) " GST_AUDIO_NE (S16) ", "
|
|
"layout = (string) interleaved, "
|
|
"rate = (int) { " SAMPLE_RATES " }, channels = (int) [ 1, 8 ]")
|
|
);
|
|
|
|
static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE ("src",
|
|
GST_PAD_SRC,
|
|
GST_PAD_ALWAYS,
|
|
GST_STATIC_CAPS ("audio/mpeg, "
|
|
"mpegversion = (int) 4, "
|
|
"rate = (int) { " SAMPLE_RATES " }, "
|
|
"channels = (int) [ 1, 8 ], "
|
|
"stream-format = (string) raw, "
|
|
"profile = (string) lc, framed = (boolean) true")
|
|
);
|
|
|
|
GST_DEBUG_CATEGORY_STATIC (gst_atenc_debug);
|
|
#define GST_CAT_DEFAULT gst_atenc_debug
|
|
|
|
G_DEFINE_TYPE (GstATEnc, gst_atenc, GST_TYPE_AUDIO_ENCODER);
|
|
GST_ELEMENT_REGISTER_DEFINE (atenc, "atenc", GST_RANK_PRIMARY, GST_TYPE_ATENC);
|
|
|
|
#define GST_ATENC_RATE_CONTROL (gst_atenc_rate_control_get_type ())
|
|
static GType
|
|
gst_atenc_rate_control_get_type (void)
|
|
{
|
|
static GType atenc_rate_control_type = 0;
|
|
static const GEnumValue types[] = {
|
|
{GST_ATENC_RATE_CONTROL_CONSTANT, "Constant bitrate", "cbr"},
|
|
{GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE, "Long-term-average bitrate",
|
|
"lta"},
|
|
{GST_ATENC_RATE_CONTROL_VARIABLE_CONSTRAINED,
|
|
"Constrained variable bitrate", "cvbr"},
|
|
{GST_ATENC_RATE_CONTROL_VARIABLE, "Variable bitrate", "vbr"},
|
|
{0, NULL, NULL}
|
|
};
|
|
|
|
if (!atenc_rate_control_type)
|
|
atenc_rate_control_type =
|
|
g_enum_register_static ("GstATEncRateControl", types);
|
|
|
|
return atenc_rate_control_type;
|
|
}
|
|
|
|
static void
|
|
gst_atenc_set_property (GObject * object, guint prop_id,
|
|
const GValue * value, GParamSpec * pspec)
|
|
{
|
|
GstATEnc *self = GST_ATENC (object);
|
|
|
|
switch (prop_id) {
|
|
case PROP_BITRATE:
|
|
self->bitrate = g_value_get_uint (value);
|
|
break;
|
|
case PROP_RATE_CONTROL:
|
|
self->rate_control = g_value_get_enum (value);
|
|
break;
|
|
case PROP_VBR_QUALITY:
|
|
self->vbr_quality = g_value_get_uint (value);
|
|
break;
|
|
default:
|
|
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void
|
|
gst_atenc_get_property (GObject * object, guint prop_id,
|
|
GValue * value, GParamSpec * pspec)
|
|
{
|
|
GstATEnc *self = GST_ATENC (object);
|
|
|
|
switch (prop_id) {
|
|
case PROP_BITRATE:
|
|
g_value_set_uint (value, self->bitrate);
|
|
break;
|
|
case PROP_RATE_CONTROL:
|
|
g_value_set_enum (value, self->rate_control);
|
|
break;
|
|
case PROP_VBR_QUALITY:
|
|
g_value_set_uint (value, self->vbr_quality);
|
|
break;
|
|
default:
|
|
G_OBJECT_WARN_INVALID_PROPERTY_ID (object, prop_id, pspec);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static gboolean
|
|
gst_atenc_start (GstAudioEncoder * enc)
|
|
{
|
|
GstATEnc *self = GST_ATENC (enc);
|
|
|
|
GST_DEBUG_OBJECT (self, "Starting encoder");
|
|
|
|
self->input_queue = gst_vec_deque_new (0);
|
|
gst_vec_deque_set_clear_func (self->input_queue,
|
|
(GDestroyNotify) gst_buffer_unref);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static void
|
|
gst_atenc_flush (GstAudioEncoder * enc)
|
|
{
|
|
GstATEnc *self = GST_ATENC (enc);
|
|
|
|
GST_DEBUG_OBJECT (self, "Flushing encoder");
|
|
AudioConverterReset (self->converter);
|
|
|
|
gst_vec_deque_clear (self->input_queue);
|
|
}
|
|
|
|
static gboolean
|
|
gst_atenc_stop (GstAudioEncoder * enc)
|
|
{
|
|
GstATEnc *self = GST_ATENC (enc);
|
|
|
|
GST_DEBUG_OBJECT (self, "Stopping encoder");
|
|
|
|
gst_atenc_flush (enc);
|
|
|
|
if (self->converter) {
|
|
AudioConverterDispose (self->converter);
|
|
self->converter = NULL;
|
|
}
|
|
|
|
gst_vec_deque_free (self->input_queue);
|
|
self->input_queue = NULL;
|
|
|
|
if (self->used_buffer) {
|
|
gst_audio_buffer_unmap (self->used_buffer);
|
|
gst_buffer_unref (self->used_buffer->buffer);
|
|
g_free (self->used_buffer);
|
|
self->used_buffer = NULL;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static GstCaps *
|
|
gst_atenc_get_caps (GstAudioEncoder * enc, GstCaps * filter)
|
|
{
|
|
GstCaps *layout_caps, *ret, *caps = gst_caps_new_empty ();
|
|
const GstATEncLayout *layout;
|
|
guint64 channel_mask;
|
|
|
|
for (layout = aac_layouts; layout->channels; layout++) {
|
|
layout_caps =
|
|
gst_caps_make_writable (gst_pad_get_pad_template_caps
|
|
(GST_AUDIO_ENCODER_SINK_PAD (enc)));
|
|
|
|
if (layout->channels == 1) {
|
|
gst_caps_set_simple (layout_caps, "channels", G_TYPE_INT,
|
|
layout->channels, NULL);
|
|
} else {
|
|
gst_audio_channel_positions_to_mask (layout->positions, layout->channels,
|
|
FALSE, &channel_mask);
|
|
gst_caps_set_simple (layout_caps, "channels", G_TYPE_INT,
|
|
layout->channels, "channel-mask", GST_TYPE_BITMASK, channel_mask,
|
|
NULL);
|
|
}
|
|
|
|
gst_caps_append (caps, layout_caps);
|
|
}
|
|
|
|
ret = gst_audio_encoder_proxy_getcaps (enc, caps, filter);
|
|
gst_caps_unref (caps);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static OSStatus
|
|
gst_atenc_fill_buffer (AudioConverterRef converter, UInt32 * packets_amount,
|
|
AudioBufferList * buffers, AudioStreamPacketDescription ** desc,
|
|
void *user_data)
|
|
{
|
|
GstATEnc *self = GST_ATENC (user_data);
|
|
GstBuffer *buf;
|
|
GstAudioBuffer *audio_buf;
|
|
GstAudioInfo *audio_info;
|
|
UInt32 wanted_samples = *packets_amount;
|
|
|
|
/* We can now safely clean up the buffer that was previously passed to AT */
|
|
if (self->used_buffer) {
|
|
gst_audio_buffer_unmap (self->used_buffer);
|
|
gst_buffer_unref (self->used_buffer->buffer);
|
|
g_free (self->used_buffer);
|
|
self->used_buffer = NULL;
|
|
}
|
|
|
|
/* See https://developer.apple.com/library/archive/qa/qa1317/_index.html
|
|
* packets_amount indicates how much data is expected to be filled in.
|
|
*
|
|
* The way this is set up, we tell the base class how many samples AT will expect,
|
|
* and it will provide us with that much. Only exception is at the end of stream,
|
|
* where there might not be enough data. Thankfully, if we signal EOS, AT will encode
|
|
* whatever it got as input, without needing to silence-pad to the expected amount.
|
|
*
|
|
* In case of less data than packets_amount => set that to the actual value and return noErr
|
|
* No data currently available, but more is expected => packets_amount=0 and return 1
|
|
* No data available and input got EOS => packets_amount=0 and return noErr
|
|
*/
|
|
buf = gst_vec_deque_pop_head (self->input_queue);
|
|
if (!buf) {
|
|
*packets_amount = 0;
|
|
|
|
if (self->input_eos) {
|
|
GST_DEBUG_OBJECT (self, "No more input data, returning noErr");
|
|
return noErr;
|
|
} else {
|
|
GST_LOG_OBJECT (self, "No input buffer yet, waiting for more data");
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/* We can only unmap the audio_buffer in the next callback, but in the meantime
|
|
* the base class can invalidate the underlying buffer. Ref it manually to ensure
|
|
* it lives long enough. */
|
|
gst_buffer_ref (buf);
|
|
audio_info = gst_audio_encoder_get_audio_info (GST_AUDIO_ENCODER (self));
|
|
audio_buf = g_malloc0 (sizeof (GstAudioBuffer));
|
|
gst_audio_buffer_map (audio_buf, audio_info, buf, GST_MAP_READ);
|
|
|
|
/* Pushing this as a pointer instead of using the _struct() variants
|
|
* because GstAudioBuffer contains self-references, so we'd get dangling pointers otherwise. */
|
|
self->used_buffer = audio_buf;
|
|
|
|
buffers->mNumberBuffers = 1;
|
|
buffers->mBuffers[0].mNumberChannels = GST_AUDIO_INFO_CHANNELS (audio_info);
|
|
buffers->mBuffers[0].mDataByteSize = GST_AUDIO_BUFFER_PLANE_SIZE (audio_buf);
|
|
buffers->mBuffers[0].mData = GST_AUDIO_BUFFER_PLANE_DATA (audio_buf, 0);
|
|
|
|
*packets_amount = audio_buf->n_samples;
|
|
GST_LOG_OBJECT (self, "Wanted %d packets, filled %d", wanted_samples,
|
|
*packets_amount);
|
|
|
|
return noErr;
|
|
}
|
|
|
|
static GstFlowReturn
|
|
gst_atenc_handle_frame (GstAudioEncoder * enc, GstBuffer * buffer)
|
|
{
|
|
GstATEnc *self = GST_ATENC (enc);
|
|
OSStatus status;
|
|
GstBuffer *outbuf;
|
|
GstFlowReturn ret;
|
|
GstMapInfo map_info;
|
|
GstAudioInfo *audio_info;
|
|
AudioBufferList out_bufs = { 0 };
|
|
AudioStreamPacketDescription out_desc = { 0 };
|
|
UInt32 out_packets;
|
|
|
|
if (!buffer) {
|
|
self->input_eos = TRUE;
|
|
GST_DEBUG_OBJECT (self, "No input buffer, draining encoder");
|
|
} else {
|
|
self->input_eos = FALSE;
|
|
gst_vec_deque_push_tail (self->input_queue, buffer);
|
|
GST_LOG ("Pushed buffer to queue");
|
|
}
|
|
|
|
outbuf =
|
|
gst_audio_encoder_allocate_output_buffer (enc,
|
|
self->max_output_buffer_size);
|
|
if (!outbuf) {
|
|
GST_ERROR_OBJECT (self, "Failed to allocate output buffer");
|
|
return GST_FLOW_ERROR;
|
|
}
|
|
|
|
gst_buffer_map (outbuf, &map_info, GST_MAP_WRITE);
|
|
|
|
audio_info = gst_audio_encoder_get_audio_info (enc);
|
|
out_bufs.mNumberBuffers = 1;
|
|
out_bufs.mBuffers[0].mNumberChannels = GST_AUDIO_INFO_CHANNELS (audio_info);
|
|
out_bufs.mBuffers[0].mDataByteSize = self->max_output_buffer_size;
|
|
out_bufs.mBuffers[0].mData = map_info.data;
|
|
out_packets = 1;
|
|
|
|
status =
|
|
AudioConverterFillComplexBuffer (self->converter, gst_atenc_fill_buffer,
|
|
self, &out_packets, &out_bufs, &out_desc);
|
|
|
|
/* gst_atenc_fill_buffer will return 1 when it doesn't have enough data yet */
|
|
if (status != noErr && status != 1) {
|
|
GST_ERROR_OBJECT (self, "Failed to fill buffer: %d", status);
|
|
return GST_FLOW_ERROR;
|
|
}
|
|
|
|
if (out_packets == 0) {
|
|
GST_LOG_OBJECT (self, "No packets produced, more data needed or input EOS");
|
|
gst_buffer_unmap (outbuf, &map_info);
|
|
gst_buffer_unref (outbuf);
|
|
return GST_FLOW_OK;
|
|
}
|
|
|
|
gst_buffer_unmap (outbuf, &map_info);
|
|
|
|
/* On exit, mDataByteSize is set to the number of bytes written. */
|
|
GST_LOG_OBJECT (self, "Output buffer size: %d", out_desc.mDataByteSize);
|
|
g_assert (out_desc.mDataByteSize <= self->max_output_buffer_size);
|
|
gst_buffer_set_size (outbuf, out_desc.mDataByteSize);
|
|
ret = gst_audio_encoder_finish_frame (enc, outbuf, self->n_output_samples);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
gst_atenc_fill_input_layout (GstAudioInfo * info, AudioChannelLayout * layout)
|
|
{
|
|
const GstAudioChannelPosition *input_positions =
|
|
&GST_AUDIO_INFO_POSITION (info, 0);
|
|
|
|
layout->mChannelLayoutTag = kAudioChannelLayoutTag_UseChannelDescriptions;
|
|
layout->mNumberChannelDescriptions = GST_AUDIO_INFO_CHANNELS (info);
|
|
for (int i = 0; i < GST_AUDIO_INFO_CHANNELS (info); i++) {
|
|
layout->mChannelDescriptions[i].mChannelLabel =
|
|
gst_audio_channel_position_to_core_audio (input_positions[i], i);
|
|
}
|
|
}
|
|
|
|
static AudioChannelLayoutTag
|
|
gst_atenc_get_output_layout_tag (GstATEnc * self, GstAudioInfo * info)
|
|
{
|
|
const GstAudioChannelPosition *input_positions =
|
|
&GST_AUDIO_INFO_POSITION (info, 0);
|
|
const GstATEncLayout *layout;
|
|
gint input_channels = GST_AUDIO_INFO_CHANNELS (info);
|
|
guint64 input_ch_mask;
|
|
|
|
gst_audio_channel_positions_to_mask (input_positions, input_channels, FALSE,
|
|
&input_ch_mask);
|
|
|
|
/* Try to find a predefined output layout that matches the input channels.
|
|
* Order doesn't matter - we set channel descriptions on input, so AT will reorder internally. */
|
|
for (layout = aac_layouts; layout->channels; layout++) {
|
|
const GstAudioChannelPosition *output_positions = layout->positions;
|
|
guint64 layout_ch_mask;
|
|
|
|
if (layout->channels != input_channels)
|
|
continue;
|
|
|
|
gst_audio_channel_positions_to_mask (output_positions, layout->channels,
|
|
FALSE, &layout_ch_mask);
|
|
if (input_ch_mask != layout_ch_mask)
|
|
continue;
|
|
|
|
return layout->aac_tag;
|
|
}
|
|
|
|
return kAudioChannelLayoutTag_Unknown;
|
|
}
|
|
|
|
static bool
|
|
_parse_descriptor (GstByteReader * br, guint8 * tag, gint * len)
|
|
{
|
|
gint size_of_instance = 0;
|
|
guint8 size_byte;
|
|
gboolean has_next_byte;
|
|
|
|
/* Descriptors are variable size, parse it according
|
|
* to the formula in sec. 14.3.3 of ISO/IEC 14496-1.
|
|
* First 8 bits is the tag. */
|
|
if (!gst_byte_reader_get_uint8 (br, tag))
|
|
return FALSE;
|
|
/* Following is one or more size_byte, in which bit 1 tells us if we should parse further,
|
|
* and the remaining 7 bits are the actual (portion of the) size */
|
|
do {
|
|
if (!gst_byte_reader_get_uint8 (br, &size_byte))
|
|
return FALSE;
|
|
has_next_byte = size_byte & 0x80;
|
|
size_of_instance = (size_of_instance << 7) | (size_byte & 0x7f);
|
|
g_assert (size_of_instance >= 0);
|
|
} while (has_next_byte && gst_byte_reader_get_remaining (br) > 0);
|
|
|
|
if (len)
|
|
*len = size_of_instance;
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
static void
|
|
gst_atenc_extract_audio_specific_config (guint8 * cookie_buf, guint cookie_size,
|
|
guint8 ** asc, guint * asc_size)
|
|
{
|
|
GstByteReader *br = gst_byte_reader_new (cookie_buf, cookie_size);
|
|
gint len;
|
|
guint8 tag, flags, flag_skip;
|
|
|
|
/* Cookie data is a MPEG descriptor structure, we need to extract the AudioSpecificConfig.
|
|
* Structures parsed below are described in ISO/IEC 14496-1 */
|
|
while (gst_byte_reader_get_remaining (br) > 0) {
|
|
if (!_parse_descriptor (br, &tag, NULL))
|
|
break;
|
|
if (tag == ES_DESCRIPTOR_TAG) {
|
|
/* First, find the ES_Descriptor and parse flags that tell us how many bits to skip */
|
|
if (!gst_byte_reader_skip (br, 2))
|
|
break;
|
|
if (!gst_byte_reader_get_uint8 (br, &flags))
|
|
break;
|
|
if (flags & 0x80)
|
|
if (!gst_byte_reader_skip (br, 2))
|
|
break;
|
|
if (flags & 0x40) {
|
|
if (!gst_byte_reader_get_uint8 (br, &flag_skip))
|
|
break;
|
|
if (!gst_byte_reader_skip (br, flag_skip))
|
|
break;
|
|
}
|
|
if (flags & 0x20)
|
|
if (!gst_byte_reader_skip (br, 2))
|
|
break;
|
|
} else if (tag == DECODER_CONFIG_DESC_TAG) {
|
|
/* Then we get the DecoderConfigDescriptor and skip its first 13 bytes to get to DecoderSpecificInfo */
|
|
if (!gst_byte_reader_skip (br, 13))
|
|
break;
|
|
if (!_parse_descriptor (br, &tag, &len))
|
|
break;
|
|
/* DecoderSpecificInfo is the AudioSpecificConfig in our case */
|
|
if (tag == DECODER_SPECIFIC_INFO_TAG) {
|
|
*asc_size = len;
|
|
*asc = g_malloc0 (*asc_size);
|
|
if (!gst_byte_reader_dup_data (br, *asc_size, asc)) {
|
|
g_free (*asc);
|
|
*asc = NULL;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static gboolean
|
|
gst_atenc_set_format (GstAudioEncoder * enc, GstAudioInfo * info)
|
|
{
|
|
GstATEnc *self = GST_ATENC (enc);
|
|
AudioStreamBasicDescription input_desc = { 0 };
|
|
AudioStreamBasicDescription output_desc = { 0 };
|
|
AudioChannelLayout *layout = NULL;
|
|
AudioChannelLayoutTag output_layout_tag;
|
|
GstCaps *src_caps;
|
|
OSStatus status;
|
|
gboolean ret;
|
|
UInt32 prop_size, max_output_size;
|
|
guint8 *cookie_data = NULL;
|
|
guint8 *audio_config = NULL;
|
|
guint32 audio_config_size = 0;
|
|
GstBuffer *asc_buf;
|
|
|
|
if (self->converter) {
|
|
/* Drain any leftover data from encoder */
|
|
gst_atenc_handle_frame (enc, NULL);
|
|
AudioConverterDispose (self->converter);
|
|
self->converter = NULL;
|
|
}
|
|
|
|
input_desc.mSampleRate = GST_AUDIO_INFO_RATE (info);
|
|
input_desc.mFormatID = kAudioFormatLinearPCM;
|
|
input_desc.mFormatFlags =
|
|
kAudioFormatFlagIsSignedInteger | kAudioFormatFlagIsPacked;
|
|
input_desc.mFramesPerPacket = 1;
|
|
input_desc.mBytesPerFrame = input_desc.mBytesPerPacket =
|
|
GST_AUDIO_INFO_BPF (info);
|
|
input_desc.mChannelsPerFrame = GST_AUDIO_INFO_CHANNELS (info);
|
|
input_desc.mBitsPerChannel = GST_AUDIO_INFO_DEPTH (info);
|
|
|
|
/* HE-AAC v1/v2 and LD to be added later.
|
|
* For LD, AudioSpecificConfig parsing fails completely, might be due to faulty MPEG descriptor parsing.
|
|
* For HE-AAC, channel configurations need testing (also sometimes fail to parse). */
|
|
output_desc.mFormatID = kAudioFormatMPEG4AAC;
|
|
output_desc.mSampleRate = GST_AUDIO_INFO_RATE (info);
|
|
output_desc.mChannelsPerFrame = GST_AUDIO_INFO_CHANNELS (info);
|
|
|
|
status = AudioConverterNew (&input_desc, &output_desc, &self->converter);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to create audio converter: %d", status);
|
|
return FALSE;
|
|
}
|
|
|
|
/* Using the encoder-provided size results in kAudioCodecBadPropertySizeError, so let's calculate it manually... */
|
|
prop_size =
|
|
sizeof (AudioChannelLayout) +
|
|
sizeof (AudioChannelDescription) * GST_AUDIO_INFO_CHANNELS (info);
|
|
layout = g_malloc0 (prop_size);
|
|
|
|
/* For input, AT expects per-channel descriptions to be used */
|
|
gst_atenc_fill_input_layout (info, layout);
|
|
status =
|
|
AudioConverterSetProperty (self->converter,
|
|
kAudioConverterInputChannelLayout, prop_size, layout);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to set input channel layout: %d", status);
|
|
g_free (layout);
|
|
return FALSE;
|
|
}
|
|
|
|
/* For output, instead of channel descriptions, we use an AAC tag indicating one of the predefined layouts */
|
|
output_layout_tag = gst_atenc_get_output_layout_tag (self, info);
|
|
if (output_layout_tag == kAudioChannelLayoutTag_Unknown) {
|
|
GST_ERROR_OBJECT (self,
|
|
"Failed to find a matching output channel layout tag");
|
|
g_free (layout);
|
|
return FALSE;
|
|
}
|
|
|
|
layout->mChannelLayoutTag = output_layout_tag;
|
|
layout->mNumberChannelDescriptions = 0;
|
|
|
|
status =
|
|
AudioConverterSetProperty (self->converter,
|
|
kAudioConverterOutputChannelLayout, prop_size, layout);
|
|
g_free (layout);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to set output channel layout: %d", status);
|
|
return FALSE;
|
|
}
|
|
|
|
/* TODO: Check if this works on iOS */
|
|
status =
|
|
AudioConverterSetProperty (self->converter,
|
|
kAudioCodecPropertyBitRateControlMode, sizeof (UInt32),
|
|
&self->rate_control);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to set bitrate control mode: %d", status);
|
|
return FALSE;
|
|
}
|
|
|
|
if (self->rate_control == GST_ATENC_RATE_CONTROL_VARIABLE) {
|
|
status =
|
|
AudioConverterSetProperty (self->converter,
|
|
kAudioCodecPropertySoundQualityForVBR, sizeof (UInt32),
|
|
&self->vbr_quality);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to set VBR quality: %d", status);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
if (self->bitrate > 0
|
|
&& (self->rate_control == GST_ATENC_RATE_CONTROL_CONSTANT
|
|
|| self->rate_control == GST_ATENC_RATE_CONTROL_LONG_TERM_AVERAGE)) {
|
|
/* Query the encoder for possible bitrate values and adjust if needed */
|
|
AudioValueRange *bitrate_ranges;
|
|
UInt32 actual_bitrate;
|
|
|
|
status =
|
|
AudioConverterGetPropertyInfo (self->converter,
|
|
kAudioConverterApplicableEncodeBitRates, &prop_size, NULL);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to get possible bitrates size: %d",
|
|
status);
|
|
return FALSE;
|
|
}
|
|
|
|
bitrate_ranges = g_malloc (prop_size);
|
|
status =
|
|
AudioConverterGetProperty (self->converter,
|
|
kAudioConverterApplicableEncodeBitRates, &prop_size, bitrate_ranges);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to get possible bitrates: %d", status);
|
|
g_free (bitrate_ranges);
|
|
return FALSE;
|
|
}
|
|
|
|
GST_LOG_OBJECT (self, "Allowed bitrate ranges:");
|
|
for (int i = 0; i < prop_size / sizeof (AudioValueRange); i++) {
|
|
AudioValueRange *range = &bitrate_ranges[i];
|
|
GST_LOG_OBJECT (self, "%d: %f - %f",
|
|
i + 1, range->mMinimum, range->mMaximum);
|
|
}
|
|
|
|
/* Returned ranges are ordered from lowest to highest values */
|
|
for (int i = 0; i < prop_size / sizeof (AudioValueRange); i++) {
|
|
AudioValueRange *range = &bitrate_ranges[i];
|
|
if (self->bitrate == range->mMinimum && self->bitrate == range->mMaximum) {
|
|
/* Often the min/max values are identical, so not that much of a range... */
|
|
actual_bitrate = self->bitrate;
|
|
break;
|
|
} else if (self->bitrate < range->mMinimum) {
|
|
actual_bitrate = range->mMinimum;
|
|
break;
|
|
} else if (self->bitrate > range->mMaximum) {
|
|
/* We might find higher values still, so no break */
|
|
actual_bitrate = range->mMaximum;
|
|
}
|
|
}
|
|
|
|
if (actual_bitrate != self->bitrate) {
|
|
GST_WARNING_OBJECT (self,
|
|
"Requested bitrate %d not in the allowed range, using %d",
|
|
self->bitrate, actual_bitrate);
|
|
self->bitrate = actual_bitrate;
|
|
}
|
|
|
|
/* TODO: This could be changed at any time instead of just in set_format,
|
|
* but from initial testing, changing the bitrate when encoding introduces
|
|
* a very short pause in encoded sound. Needs investigation. */
|
|
status =
|
|
AudioConverterSetProperty (self->converter,
|
|
kAudioConverterEncodeBitRate, sizeof (UInt32), &actual_bitrate);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to set bitrate: %d", status);
|
|
g_free (bitrate_ranges);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/* After creation, encoder fills input/output desc with more details */
|
|
prop_size = sizeof (output_desc);
|
|
status =
|
|
AudioConverterGetProperty (self->converter,
|
|
kAudioConverterCurrentOutputStreamDescription, &prop_size, &output_desc);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to get output format: %d", status);
|
|
return FALSE;
|
|
}
|
|
self->n_output_samples = output_desc.mFramesPerPacket;
|
|
GST_DEBUG_OBJECT (self, "samples per output packet: %d",
|
|
self->n_output_samples);
|
|
|
|
/* This isn't always set, so we might need to query manually */
|
|
max_output_size = output_desc.mBytesPerPacket;
|
|
if (max_output_size == 0) {
|
|
prop_size = sizeof (max_output_size);
|
|
status =
|
|
AudioConverterGetProperty (self->converter,
|
|
kAudioConverterPropertyMaximumOutputPacketSize, &prop_size,
|
|
&max_output_size);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to get maximum output packet size: %d",
|
|
status);
|
|
return FALSE;
|
|
}
|
|
}
|
|
self->max_output_buffer_size = max_output_size;
|
|
GST_DEBUG_OBJECT (self, "maximum output buffer size: %d",
|
|
self->max_output_buffer_size);
|
|
|
|
/* For AAC, AT usually asks for 1024 samples per packet, base class needs to know */
|
|
gst_audio_encoder_set_frame_max (enc, 1);
|
|
gst_audio_encoder_set_frame_samples_min (enc, self->n_output_samples);
|
|
gst_audio_encoder_set_frame_samples_max (enc, self->n_output_samples);
|
|
gst_audio_encoder_set_drainable (enc, TRUE);
|
|
|
|
/* FIXME: Handle lookahead according to kAudioConverterPrimeInfo.leadingFrames.
|
|
* When passed directly to gst_audio_encoder_set_lookahead, causes
|
|
* an audible skip in audio, and muxers such as mp4mux error out.
|
|
* To be investigated. */
|
|
|
|
status =
|
|
AudioConverterGetPropertyInfo (self->converter,
|
|
kAudioConverterCompressionMagicCookie, &prop_size, NULL);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to get magic cookie size: %d", status);
|
|
return FALSE;
|
|
}
|
|
|
|
cookie_data = g_malloc (prop_size);
|
|
status =
|
|
AudioConverterGetProperty (self->converter,
|
|
kAudioConverterCompressionMagicCookie, &prop_size, cookie_data);
|
|
if (status != noErr) {
|
|
GST_ERROR_OBJECT (self, "Failed to get magic cookie: %d", status);
|
|
g_free (cookie_data);
|
|
return FALSE;
|
|
}
|
|
|
|
/* Cookie contains a bunch of descriptors, gotta dig a bit to get the AudioSpecificConfig */
|
|
gst_atenc_extract_audio_specific_config (cookie_data, prop_size,
|
|
&audio_config, &audio_config_size);
|
|
if (!audio_config) {
|
|
GST_ERROR_OBJECT (self, "Failed to extract AudioSpecificConfig");
|
|
g_free (cookie_data);
|
|
return FALSE;
|
|
}
|
|
|
|
asc_buf = gst_buffer_new_wrapped (audio_config, audio_config_size);
|
|
|
|
src_caps = gst_caps_new_simple ("audio/mpeg",
|
|
"mpegversion", G_TYPE_INT, 4,
|
|
"rate", G_TYPE_INT, GST_AUDIO_INFO_RATE (info),
|
|
"channels", G_TYPE_INT, GST_AUDIO_INFO_CHANNELS (info),
|
|
"stream-format", G_TYPE_STRING, "raw",
|
|
"framed", G_TYPE_BOOLEAN, TRUE,
|
|
"codec_data", GST_TYPE_BUFFER, asc_buf, NULL);
|
|
|
|
gst_codec_utils_aac_caps_set_level_and_profile (src_caps, audio_config,
|
|
audio_config_size);
|
|
gst_buffer_unref (asc_buf);
|
|
g_free (cookie_data);
|
|
|
|
ret = gst_audio_encoder_set_output_format (enc, src_caps);
|
|
GST_DEBUG ("output caps: %" GST_PTR_FORMAT, src_caps);
|
|
gst_caps_unref (src_caps);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void
|
|
gst_atenc_init (GstATEnc * self)
|
|
{
|
|
self->bitrate = DEFAULT_BITRATE;
|
|
self->rate_control = DEFAULT_RATE_CONTROL;
|
|
self->vbr_quality = DEFAULT_VBR_QUALITY;
|
|
self->input_eos = FALSE;
|
|
self->used_buffer = NULL;
|
|
}
|
|
|
|
static void
|
|
gst_atenc_class_init (GstATEncClass * klass)
|
|
{
|
|
GObjectClass *object_class = G_OBJECT_CLASS (klass);
|
|
GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
|
|
GstAudioEncoderClass *base_class = GST_AUDIO_ENCODER_CLASS (klass);
|
|
|
|
object_class->set_property = GST_DEBUG_FUNCPTR (gst_atenc_set_property);
|
|
object_class->get_property = GST_DEBUG_FUNCPTR (gst_atenc_get_property);
|
|
|
|
base_class->start = GST_DEBUG_FUNCPTR (gst_atenc_start);
|
|
base_class->stop = GST_DEBUG_FUNCPTR (gst_atenc_stop);
|
|
base_class->getcaps = GST_DEBUG_FUNCPTR (gst_atenc_get_caps);
|
|
base_class->set_format = GST_DEBUG_FUNCPTR (gst_atenc_set_format);
|
|
base_class->handle_frame = GST_DEBUG_FUNCPTR (gst_atenc_handle_frame);
|
|
base_class->flush = GST_DEBUG_FUNCPTR (gst_atenc_flush);
|
|
|
|
/**
|
|
* GstATEnc:bitrate:
|
|
*
|
|
* Target output bitrate in bps, for CBR and LTA rate control modes.
|
|
*
|
|
* Since: 1.26
|
|
*/
|
|
g_object_class_install_property (object_class, PROP_BITRATE,
|
|
g_param_spec_uint ("bitrate",
|
|
"Bitrate",
|
|
"target output bitrate in bps (for rate-control=cbr/lta) (0 - auto)",
|
|
0, G_MAXUINT32, DEFAULT_BITRATE,
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
|
|
|
|
/**
|
|
* GstATEnc:rate-control:
|
|
*
|
|
* Rate control mode to be applied by the encoder.
|
|
* CBR and LTA modes use the bitrate property, VBR uses the vbr-quality property.
|
|
* Constrained VBR determines the bitrate/quality automatically based on the input signal.
|
|
*
|
|
* Since: 1.26
|
|
*/
|
|
g_object_class_install_property (object_class, PROP_RATE_CONTROL,
|
|
g_param_spec_enum ("rate-control",
|
|
"Rate control",
|
|
"Mode of output bitrate control to be applied",
|
|
GST_ATENC_RATE_CONTROL,
|
|
DEFAULT_RATE_CONTROL, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
|
|
|
|
/**
|
|
* GstATEnc:vbr-quality:
|
|
*
|
|
* Sound quality setting for VBR encoding.
|
|
*
|
|
* Since: 1.26
|
|
*/
|
|
g_object_class_install_property (object_class, PROP_VBR_QUALITY,
|
|
g_param_spec_uint ("vbr-quality",
|
|
"VBR quality",
|
|
"Sound quality setting for VBR encoding (rate-control=vbr) (0-127)",
|
|
0, 127, DEFAULT_VBR_QUALITY,
|
|
G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
|
|
|
|
gst_element_class_add_static_pad_template (element_class, &sink_template);
|
|
gst_element_class_add_static_pad_template (element_class, &src_template);
|
|
|
|
gst_element_class_set_static_metadata (element_class,
|
|
"AudioToolbox audio encoder", "Coder/Encoder/Audio/Converter",
|
|
"AudioToolbox based audio encoder for macOS/iOS",
|
|
"Piotr Brzeziński <piotr@centricular.com>");
|
|
|
|
GST_DEBUG_CATEGORY_INIT (gst_atenc_debug, "atenc", 0,
|
|
"AudioToolbox based audio encoder");
|
|
}
|