isomp4: qtmux: Add Closed Caption support

Supports CEA 608 and CEA 708 CC streams

Also supports usage in "Robust Prefill" mode if the incoming caption
stream is constant (i.e. there is one incoming CC buffer for each
video frame).

https://bugzilla.gnome.org/show_bug.cgi?id=606643
This commit is contained in:
Edward Hervey 2018-02-07 11:00:18 +01:00 committed by Edward Hervey
parent 76e32ef414
commit 7378f1b4fd
6 changed files with 366 additions and 35 deletions

View file

@ -692,6 +692,7 @@ atom_stsd_remove_entries (AtomSTSD * stsd)
case TIMECODE:
sample_entry_tmcd_free ((SampleTableEntryTMCD *) se);
break;
case CLOSEDCAPTION:
default:
/* best possible cleanup */
atom_sample_entry_free (se);
@ -2229,6 +2230,20 @@ sample_entry_tmcd_copy_data (SampleTableEntryTMCD * tmcd, guint8 ** buffer,
return *offset - original_offset;
}
static guint64
sample_entry_generic_copy_data (SampleTableEntry * entry, guint8 ** buffer,
guint64 * size, guint64 * offset)
{
guint64 original_offset = *offset;
if (!atom_sample_entry_copy_data (entry, buffer, size, offset)) {
return 0;
}
atom_write_size (buffer, size, offset, original_offset);
return *offset - original_offset;
}
guint64
atom_stsz_copy_data (AtomSTSZ * stsz, guint8 ** buffer, guint64 * size,
guint64 * offset)
@ -2462,6 +2477,11 @@ atom_stsd_copy_data (AtomSTSD * stsd, guint8 ** buffer, guint64 * size,
walker->data, buffer, size, offset)) {
return 0;
}
} else if (se->kind == CLOSEDCAPTION) {
if (!sample_entry_generic_copy_data ((SampleTableEntry *)
walker->data, buffer, size, offset)) {
return 0;
}
} else {
if (!atom_hint_sample_entry_copy_data (
(AtomHintSampleEntry *) walker->data, buffer, size, offset)) {
@ -4082,6 +4102,44 @@ atom_trak_set_timecode_type (AtomTRAK * trak, AtomsContext * context,
return ste;
}
SampleTableEntry *
atom_trak_set_caption_type (AtomTRAK * trak, AtomsContext * context,
guint32 trak_timescale, guint32 caption_type)
{
SampleTableEntry *ste;
AtomGMHD *gmhd = trak->mdia.minf.gmhd;
AtomSTSD *stsd = &trak->mdia.minf.stbl.stsd;
if (context->flavor != ATOMS_TREE_FLAVOR_MOV) {
return NULL;
}
trak->mdia.mdhd.time_info.timescale = trak_timescale;
trak->mdia.hdlr.component_type = FOURCC_mhlr;
trak->mdia.hdlr.handler_type = FOURCC_clcp;
g_free (trak->mdia.hdlr.name);
trak->mdia.hdlr.name = g_strdup ("Closed Caption Media Handler");
ste = g_new0 (SampleTableEntry, 1);
atom_sample_entry_init (ste, caption_type);
ste->kind = CLOSEDCAPTION;
ste->data_reference_index = 1;
stsd->entries = g_list_prepend (stsd->entries, ste);
stsd->n_entries++;
gmhd = atom_gmhd_new ();
gmhd->gmin.graphics_mode = 0x0040;
gmhd->gmin.opcolor[0] = 0x8000;
gmhd->gmin.opcolor[1] = 0x8000;
gmhd->gmin.opcolor[2] = 0x8000;
trak->mdia.minf.gmhd = gmhd;
trak->is_video = FALSE;
trak->is_h264 = FALSE;
return ste;
}
static AtomInfo *
build_pasp_extension (gint par_width, gint par_height)
{

View file

@ -386,7 +386,8 @@ typedef enum _SampleEntryKind
AUDIO,
VIDEO,
SUBTITLE,
TIMECODE
TIMECODE,
CLOSEDCAPTION
} SampleEntryKind;
typedef struct _SampleTableEntry
@ -999,7 +1000,6 @@ guint64 atom_mfra_copy_data (AtomMFRA *mfra, guint8 **buffer, guint64
/* media sample description related helpers */
typedef struct
{
guint16 version;
@ -1056,6 +1056,9 @@ SampleTableEntryTX3G * atom_trak_set_subtitle_type (AtomTRAK * trak, AtomsContex
SampleTableEntryTMCD *
atom_trak_set_timecode_type (AtomTRAK * trak, AtomsContext * context, guint trak_timescale, GstVideoTimeCode * tc);
SampleTableEntry * atom_trak_set_caption_type (AtomTRAK *trak, AtomsContext *context,
guint32 trak_timescale, guint32 caption_type);
void atom_trak_update_bitrates (AtomTRAK * trak, guint32 avg_bitrate,
guint32 max_bitrate);

View file

@ -460,7 +460,8 @@ gst_qt_mux_base_init (gpointer g_class)
GstElementClass *element_class = GST_ELEMENT_CLASS (g_class);
GstQTMuxClass *klass = (GstQTMuxClass *) g_class;
GstQTMuxClassParams *params;
GstPadTemplate *videosinktempl, *audiosinktempl, *subtitlesinktempl;
GstPadTemplate *videosinktempl, *audiosinktempl, *subtitlesinktempl,
*captionsinktempl;
GstPadTemplate *srctempl;
gchar *longname, *description;
@ -505,6 +506,13 @@ gst_qt_mux_base_init (gpointer g_class)
gst_element_class_add_pad_template (element_class, subtitlesinktempl);
}
if (params->caption_sink_caps) {
captionsinktempl = gst_pad_template_new_with_gtype ("caption_%u",
GST_PAD_SINK, GST_PAD_REQUEST, params->caption_sink_caps,
GST_TYPE_QT_MUX_PAD);
gst_element_class_add_pad_template (element_class, captionsinktempl);
}
klass->format = params->prop->format;
}
@ -865,6 +873,157 @@ gst_qt_mux_prepare_jpc_buffer (GstQTPad * qtpad, GstBuffer * buf,
return newbuf;
}
static gsize
extract_608_field_from_cc_data (const guint8 * ccdata, gsize ccdata_size,
guint field, guint8 ** res)
{
guint8 *storage;
gsize storage_size = 128;
gsize i, res_size = 0;
storage = g_malloc0 (storage_size);
/* Iterate over the ccdata and put the corresponding tuples for the given field
* in the storage */
for (i = 0; i < ccdata_size; i += 3) {
if ((field == 1 && ccdata[i * 3] == 0xfc) ||
(field == 2 && ccdata[i * 3] == 0xfd)) {
GST_DEBUG ("Storing matching cc for field %d : 0x%02x 0x%02x", field,
ccdata[i * 3 + 1], ccdata[i * 3 + 2]);
if (res_size >= storage_size) {
storage_size += 128;
storage = g_realloc (storage, storage_size);
}
storage[res_size] = ccdata[i * 3 + 1];
storage[res_size + 1] = ccdata[i * 3 + 2];
res_size += 2;
}
}
if (res_size == 0) {
g_free (storage);
*res = NULL;
return 0;
}
*res = storage;
return res_size;
}
static GstBuffer *
gst_qt_mux_prepare_caption_buffer (GstQTPad * qtpad, GstBuffer * buf,
GstQTMux * qtmux)
{
GstBuffer *newbuf = NULL;
GstMapInfo map, inmap;
gsize size;
gboolean in_prefill;
if (buf == NULL)
return NULL;
in_prefill = (qtmux->mux_mode == GST_QT_MUX_MODE_ROBUST_RECORDING_PREFILL);
size = gst_buffer_get_size (buf);
gst_buffer_map (buf, &inmap, GST_MAP_READ);
GST_LOG_OBJECT (qtmux,
"Preparing caption buffer %" GST_FOURCC_FORMAT " size:%" G_GSIZE_FORMAT,
GST_FOURCC_ARGS (qtpad->fourcc), size);
switch (qtpad->fourcc) {
case FOURCC_c608:
{
guint8 *cdat, *cdt2;
gsize cdat_size, cdt2_size, total_size = 0;
gsize write_offs = 0;
cdat_size = extract_608_field_from_cc_data (map.data, map.size, 1, &cdat);
cdt2_size = extract_608_field_from_cc_data (map.data, map.size, 2, &cdt2);
if (cdat_size)
total_size += cdat_size + 8;
if (cdt2_size)
total_size += cdt2_size + 8;
if (total_size == 0) {
GST_DEBUG_OBJECT (qtmux, "No 608 data ?");
/* FIXME : We might want to *always* store something, even if
* it's "empty" CC (i.e. 0x80 0x80) */
break;
}
newbuf = gst_buffer_new_and_alloc (in_prefill ? 20 : total_size);
/* Let's copy over all metadata and not the memory */
gst_buffer_copy_into (newbuf, buf, GST_BUFFER_COPY_METADATA, 0, size);
gst_buffer_map (newbuf, &map, GST_MAP_WRITE);
if (cdat_size || in_prefill) {
GST_WRITE_UINT32_BE (map.data, in_prefill ? 10 : cdat_size + 8);
GST_WRITE_UINT32_LE (map.data + 4, FOURCC_cdat);
if (cdat_size)
memcpy (map.data + 8, cdat, in_prefill ? 2 : cdat_size);
else {
/* Write 'empty' CC */
map.data[8] = 0x80;
map.data[9] = 0x80;
}
write_offs = in_prefill ? 10 : cdat_size + 8;
if (cdat_size)
g_free (cdat);
}
if (cdt2_size || in_prefill) {
GST_WRITE_UINT32_BE (map.data + write_offs,
in_prefill ? 10 : cdt2_size + 8);
GST_WRITE_UINT32_LE (map.data + write_offs + 4, FOURCC_cdt2);
if (cdt2_size)
memcpy (map.data + write_offs + 8, cdt2, in_prefill ? 2 : cdt2_size);
else {
/* Write 'empty' CC */
map.data[write_offs + 8] = 0x80;
map.data[write_offs + 9] = 0x80;
}
if (cdt2_size)
g_free (cdt2);
}
gst_buffer_unmap (newbuf, &map);
break;
}
break;
case FOURCC_c708:
{
/* Take the whole CDP */
if (in_prefill && size > 92) {
GST_ERROR_OBJECT (qtmux, "Input C708 CDP too big for prefill mode !");
break;
}
newbuf = gst_buffer_new_and_alloc (in_prefill ? 100 : size + 8);
/* Let's copy over all metadata and not the memory */
gst_buffer_copy_into (newbuf, buf, GST_BUFFER_COPY_METADATA, 0, size);
gst_buffer_map (newbuf, &map, GST_MAP_WRITE);
GST_WRITE_UINT32_BE (map.data, size + 8);
GST_WRITE_UINT32_LE (map.data + 4, FOURCC_ccdp);
memcpy (map.data + 8, inmap.data, inmap.size);
gst_buffer_unmap (newbuf, &map);
break;
}
default:
/* theoretically this should never happen, but let's keep this here in case */
GST_WARNING_OBJECT (qtmux, "Unknown caption format");
break;
}
gst_buffer_unmap (buf, &inmap);
gst_buffer_unref (buf);
return newbuf;
}
static GstBuffer *
gst_qt_mux_prepare_tx3g_buffer (GstQTPad * qtpad, GstBuffer * buf,
GstQTMux * qtmux)
@ -2265,6 +2424,8 @@ prefill_get_block_index (GstQTMux * qtmux, GstQTPad * qpad)
case FOURCC_apco:
case FOURCC_ap4h:
case FOURCC_ap4x:
case FOURCC_c608:
case FOURCC_c708:
return qpad->sample_offset;
case FOURCC_sowt:
case FOURCC_twos:
@ -2333,6 +2494,12 @@ prefill_get_sample_size (GstQTMux * qtmux, GstQTPad * qpad)
return 900000;
}
break;
case FOURCC_c608:
/* We always write both cdat and cdt2 atom in prefill mode */
return 20;
case FOURCC_c708:
/* We're cheating a bit by always allocating 100bytes even if we use less */
return 100;
case FOURCC_sowt:
case FOURCC_twos:{
guint64 block_idx;
@ -2367,6 +2534,8 @@ prefill_get_next_timestamp (GstQTMux * qtmux, GstQTPad * qpad)
case FOURCC_apco:
case FOURCC_ap4h:
case FOURCC_ap4x:
case FOURCC_c608:
case FOURCC_c708:
return gst_util_uint64_scale (qpad->sample_offset + 1,
qpad->expected_sample_duration_d * GST_SECOND,
qpad->expected_sample_duration_n);
@ -2463,22 +2632,9 @@ prefill_raw_audio_prepare_buf_func (GstQTPad * qtpad, GstBuffer * buf,
return buf;
}
static gboolean
prefill_update_sample_size (GstQTMux * qtmux, GstQTPad * qpad)
static void
find_video_sample_duration (GstQTMux * qtmux, guint * dur_n, guint * dur_d)
{
switch (qpad->fourcc) {
case FOURCC_apch:
case FOURCC_apcn:
case FOURCC_apcs:
case FOURCC_apco:
case FOURCC_ap4h:
case FOURCC_ap4x:{
guint sample_size = prefill_get_sample_size (qtmux, qpad);
atom_trak_set_constant_size_samples (qpad->trak, sample_size);
return TRUE;
}
case FOURCC_sowt:
case FOURCC_twos:{
GSList *walk;
/* Find the (first) video track and assume that we have to output
@ -2488,21 +2644,50 @@ prefill_update_sample_size (GstQTMux * qtmux, GstQTPad * qpad)
GstQTPad *tmp_qpad = (GstQTPad *) cdata;
if (tmp_qpad->trak->is_video) {
qpad->expected_sample_duration_n =
tmp_qpad->expected_sample_duration_n;
qpad->expected_sample_duration_d =
tmp_qpad->expected_sample_duration_d;
*dur_n = tmp_qpad->expected_sample_duration_n;
*dur_d = tmp_qpad->expected_sample_duration_d;
break;
}
}
if (walk == NULL) {
GST_INFO_OBJECT (qpad->collect.pad,
GST_INFO_OBJECT (qtmux,
"Found no video framerate, using 40ms audio buffers");
qpad->expected_sample_duration_n = 25;
qpad->expected_sample_duration_d = 1;
*dur_n = 25;
*dur_d = 1;
}
}
/* Called when all pads are prerolled to adjust and */
static gboolean
prefill_update_sample_size (GstQTMux * qtmux, GstQTPad * qpad)
{
switch (qpad->fourcc) {
case FOURCC_apch:
case FOURCC_apcn:
case FOURCC_apcs:
case FOURCC_apco:
case FOURCC_ap4h:
case FOURCC_ap4x:
{
guint sample_size = prefill_get_sample_size (qtmux, qpad);
atom_trak_set_constant_size_samples (qpad->trak, sample_size);
return TRUE;
}
case FOURCC_c608:
case FOURCC_c708:
{
guint sample_size = prefill_get_sample_size (qtmux, qpad);
/* We need a "valid" duration */
find_video_sample_duration (qtmux, &qpad->expected_sample_duration_n,
&qpad->expected_sample_duration_d);
atom_trak_set_constant_size_samples (qpad->trak, sample_size);
return TRUE;
}
case FOURCC_sowt:
case FOURCC_twos:{
find_video_sample_duration (qtmux, &qpad->expected_sample_duration_n,
&qpad->expected_sample_duration_d);
/* Set a prepare_buf_func that ensures this */
qpad->prepare_buf_func = prefill_raw_audio_prepare_buf_func;
qpad->raw_audio_adapter = gst_adapter_new ();
@ -4511,8 +4696,10 @@ gst_qt_mux_add_buffer (GstQTMux * qtmux, GstQTPad * pad, GstBuffer * buf)
gst_qt_mux_register_and_push_sample (qtmux, pad, empty_buf, FALSE, 1,
last_dts + scaled_duration, empty_duration_scaled,
empty_size, chunk_offset, sync, TRUE, 0);
} else {
/* our only case currently is tx3g subtitles, so there is no reason to fill this yet */
} else if (pad->fourcc != FOURCC_c608 && pad->fourcc != FOURCC_c708) {
/* This assert is kept here to make sure implementors of new
* sparse input format decide whether there needs to be special
* gap handling or not */
g_assert_not_reached ();
GST_WARNING_OBJECT (qtmux,
"no empty buffer creation function found for pad %s",
@ -5780,6 +5967,63 @@ refuse_caps:
}
}
static gboolean
gst_qt_mux_caption_sink_set_caps (GstQTPad * qtpad, GstCaps * caps)
{
GstPad *pad = qtpad->collect.pad;
GstQTMux *qtmux = GST_QT_MUX_CAST (gst_pad_get_parent (pad));
GstStructure *structure;
guint32 fourcc_entry;
guint32 timescale;
if (qtpad->fourcc)
return gst_qt_mux_can_renegotiate (qtmux, pad, caps);
GST_DEBUG_OBJECT (qtmux, "%s:%s, caps=%" GST_PTR_FORMAT,
GST_DEBUG_PAD_NAME (pad), caps);
/* captions default */
qtpad->is_out_of_order = FALSE;
qtpad->sync = FALSE;
qtpad->sparse = TRUE;
/* Closed caption data are within atoms */
qtpad->prepare_buf_func = gst_qt_mux_prepare_caption_buffer;
structure = gst_caps_get_structure (caps, 0);
/* We know we only handle 608,format=cc_data and 708,format=cdp */
if (gst_structure_has_name (structure, "closedcaption/x-cea-608")) {
fourcc_entry = FOURCC_c608;
} else if (gst_structure_has_name (structure, "closedcaption/x-cea-708")) {
fourcc_entry = FOURCC_c708;
} else
goto refuse_caps;
/* FIXME: Get the timescale from the video track ? */
timescale = gst_qt_mux_pad_get_timescale (GST_QT_MUX_PAD_CAST (pad));
if (!timescale && qtmux->trak_timescale)
timescale = qtmux->trak_timescale;
else if (!timescale)
timescale = 30000;
qtpad->fourcc = fourcc_entry;
qtpad->trak_ste =
(SampleTableEntry *) atom_trak_set_caption_type (qtpad->trak,
qtmux->context, timescale, fourcc_entry);
gst_object_unref (qtmux);
return TRUE;
/* ERRORS */
refuse_caps:
{
GST_WARNING_OBJECT (qtmux, "pad %s refused caps %" GST_PTR_FORMAT,
GST_PAD_NAME (pad), caps);
gst_object_unref (qtmux);
return FALSE;
}
}
static gboolean
gst_qt_mux_sink_event (GstCollectPads * pads, GstCollectData * data,
GstEvent * event, gpointer user_data)
@ -5951,6 +6195,14 @@ gst_qt_mux_request_new_pad (GstElement * element,
name = g_strdup_printf ("subtitle_%u", qtmux->subtitle_pads++);
}
lock = FALSE;
} else if (templ == gst_element_class_get_pad_template (klass, "caption_%u")) {
setcaps_func = gst_qt_mux_caption_sink_set_caps;
if (req_name != NULL && sscanf (req_name, "caption_%u", &pad_id) == 1) {
name = g_strdup (req_name);
} else {
name = g_strdup_printf ("caption_%u", qtmux->caption_pads++);
}
lock = FALSE;
} else
goto wrong_template;
@ -6245,7 +6497,7 @@ gst_qt_mux_register (GstPlugin * plugin)
while (TRUE) {
GstQTMuxFormatProp *prop;
GstCaps *subtitle_caps;
GstCaps *subtitle_caps, *caption_caps;
prop = &gst_qt_mux_format_list[i];
format = prop->format;
@ -6264,6 +6516,12 @@ gst_qt_mux_register (GstPlugin * plugin)
} else {
gst_caps_unref (subtitle_caps);
}
caption_caps = gst_static_caps_get (&prop->caption_sink_caps);
if (!gst_caps_is_equal (caption_caps, GST_CAPS_NONE)) {
params->caption_sink_caps = caption_caps;
} else {
gst_caps_unref (caption_caps);
}
/* create the type now */
type = g_type_register_static (GST_TYPE_ELEMENT, prop->type_name, &typeinfo,

View file

@ -290,7 +290,7 @@ struct _GstQTMux
gboolean reserved_prefill;
/* for request pad naming */
guint video_pads, audio_pads, subtitle_pads;
guint video_pads, audio_pads, subtitle_pads, caption_pads;
};
struct _GstQTMuxClass
@ -308,6 +308,7 @@ typedef struct _GstQTMuxClassParams
GstCaps *video_sink_caps;
GstCaps *audio_sink_caps;
GstCaps *subtitle_sink_caps;
GstCaps *caption_sink_caps;
} GstQTMuxClassParams;
#define GST_QT_MUX_PARAMS_QDATA g_quark_from_static_string("qt-mux-params")

View file

@ -166,6 +166,11 @@
"text/x-raw, " \
"format=(string)utf8"
#define CEA608_CAPS \
"closedcaption/x-cea-608, format=(string)cc_data"
#define CEA708_CAPS \
"closedcaption/x-cea-708, format=(string)cdp"
/* FIXME 0.11 - take a look at bugs #580005 and #340375 */
GstQTMuxFormatProp gst_qt_mux_format_list[] = {
/* original QuickTime format; see Apple site (e.g. qtff.pdf) */
@ -209,7 +214,8 @@ GstQTMuxFormatProp gst_qt_mux_format_list[] = {
"audio/x-alaw, " COMMON_AUDIO_CAPS (2, MAX) "; "
"audio/x-mulaw, " COMMON_AUDIO_CAPS (2, MAX) "; "
AMR_CAPS " ; " ALAC_CAPS " ; " OPUS_CAPS),
GST_STATIC_CAPS (TEXT_UTF8)}
GST_STATIC_CAPS (TEXT_UTF8),
GST_STATIC_CAPS (CEA608_CAPS "; " CEA708_CAPS)}
,
/* ISO 14496-14: mp42 as ISO base media extension
* (supersedes original ISO 144996-1 mp41) */
@ -224,7 +230,8 @@ GstQTMuxFormatProp gst_qt_mux_format_list[] = {
"video/x-mp4-part," COMMON_VIDEO_CAPS),
GST_STATIC_CAPS (MP123_CAPS "; "
AAC_CAPS " ; " AC3_CAPS " ; " ALAC_CAPS " ; " OPUS_CAPS),
GST_STATIC_CAPS (TEXT_UTF8)}
GST_STATIC_CAPS (TEXT_UTF8),
GST_STATIC_CAPS_NONE}
,
/* Microsoft Smooth Streaming fmp4/isml */
/* TODO add WMV/WMA support */
@ -237,6 +244,7 @@ GstQTMuxFormatProp gst_qt_mux_format_list[] = {
GST_STATIC_CAPS ("video/quicktime, variant = (string) iso-fragmented"),
GST_STATIC_CAPS (MPEG4V_CAPS "; " H264_CAPS),
GST_STATIC_CAPS (MP3_CAPS "; " AAC_CAPS),
GST_STATIC_CAPS_NONE,
GST_STATIC_CAPS_NONE}
,
/* 3GPP Technical Specification 26.244 V7.3.0
@ -250,7 +258,8 @@ GstQTMuxFormatProp gst_qt_mux_format_list[] = {
GST_STATIC_CAPS ("video/quicktime, variant = (string) 3gpp"),
GST_STATIC_CAPS (H263_CAPS "; " MPEG4V_CAPS "; " H264_CAPS),
GST_STATIC_CAPS (AMR_CAPS "; " MP3_CAPS "; " AAC_CAPS "; " AC3_CAPS),
GST_STATIC_CAPS (TEXT_UTF8)}
GST_STATIC_CAPS (TEXT_UTF8),
GST_STATIC_CAPS_NONE}
,
/* ISO 15444-3: Motion-JPEG-2000 (also ISO base media extension) */
{
@ -263,6 +272,7 @@ GstQTMuxFormatProp gst_qt_mux_format_list[] = {
GST_STATIC_CAPS ("image/x-j2c, " COMMON_VIDEO_CAPS "; "
"image/x-jpc, " COMMON_VIDEO_CAPS),
GST_STATIC_CAPS (PCM_CAPS),
GST_STATIC_CAPS_NONE,
GST_STATIC_CAPS_NONE}
,
{

View file

@ -70,6 +70,7 @@ typedef struct _GstQTMuxFormatProp
GstStaticCaps video_sink_caps;
GstStaticCaps audio_sink_caps;
GstStaticCaps subtitle_sink_caps;
GstStaticCaps caption_sink_caps;
} GstQTMuxFormatProp;
extern GstQTMuxFormatProp gst_qt_mux_format_list[];