From 6cada5b0644c3e456b583872d0cbd6e549dadb87 Mon Sep 17 00:00:00 2001 From: Jan Schmidt <jan@centricular.com> Date: Sat, 5 Jun 2021 03:13:52 +1000 Subject: [PATCH] qtdemux: Add support for wvtt (WebVTT) subtitles. WebVTT in ISO MP4 is specified in ISO 14496-30, and needed for DASH support. It's stored in an mp4 specific format. To handle it compatibly, the wvtt boxes are converted back into WebVTT text and pushed as application/x-subtitle-vtt Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/1182> --- .../gst-plugins-good/gst/isomp4/fourcc.h | 9 + .../gst-plugins-good/gst/isomp4/meson.build | 1 + .../gst/isomp4/qtdemux-webvtt.c | 221 ++++++++++++++++++ .../gst/isomp4/qtdemux-webvtt.h | 32 +++ .../gst-plugins-good/gst/isomp4/qtdemux.c | 71 +++++- .../gst/isomp4/qtdemux_types.c | 2 + 6 files changed, 327 insertions(+), 9 deletions(-) create mode 100644 subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.c create mode 100644 subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.h diff --git a/subprojects/gst-plugins-good/gst/isomp4/fourcc.h b/subprojects/gst-plugins-good/gst/isomp4/fourcc.h index 8872b4bda3..5be6921e7d 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/fourcc.h +++ b/subprojects/gst-plugins-good/gst/isomp4/fourcc.h @@ -110,6 +110,7 @@ G_BEGIN_DECLS #define FOURCC_cprt GST_MAKE_FOURCC('c','p','r','t') #define FOURCC_crgn GST_MAKE_FOURCC('c','r','g','n') #define FOURCC_ctab GST_MAKE_FOURCC('c','t','a','b') +#define FOURCC_ctim GST_MAKE_FOURCC('c','t','i','m') #define FOURCC_ctts GST_MAKE_FOURCC('c','t','t','s') #define FOURCC_cslg GST_MAKE_FOURCC('c','s','l','g') #define FOURCC_d263 GST_MAKE_FOURCC('d','2','6','3') @@ -158,6 +159,7 @@ G_BEGIN_DECLS #define FOURCC_hnti GST_MAKE_FOURCC('h','n','t','i') #define FOURCC_hvc1 GST_MAKE_FOURCC('h','v','c','1') #define FOURCC_hvcC GST_MAKE_FOURCC('h','v','c','C') +#define FOURCC_iden GST_MAKE_FOURCC('i','d','e','n') #define FOURCC_ilst GST_MAKE_FOURCC('i','l','s','t') #define FOURCC_ima4 GST_MAKE_FOURCC('i','m','a','4') #define FOURCC_imap GST_MAKE_FOURCC('i','m','a','p') @@ -201,6 +203,7 @@ G_BEGIN_DECLS #define FOURCC_prof GST_MAKE_FOURCC('p','r','o','f') #define FOURCC_enof GST_MAKE_FOURCC('e','n','o','f') #define FOURCC_fiel GST_MAKE_FOURCC('f','i','e','l') +#define FOURCC_payl GST_MAKE_FOURCC('p','a','y','l') #define FOURCC_pcst GST_MAKE_FOURCC('p','c','s','t') #define FOURCC_pgap GST_MAKE_FOURCC('p','g','a','p') #define FOURCC_png GST_MAKE_FOURCC('p','n','g',' ') @@ -242,6 +245,7 @@ G_BEGIN_DECLS #define FOURCC_stsd GST_MAKE_FOURCC('s','t','s','d') #define FOURCC_stss GST_MAKE_FOURCC('s','t','s','s') #define FOURCC_stsz GST_MAKE_FOURCC('s','t','s','z') +#define FOURCC_sttg GST_MAKE_FOURCC('s','t','t','g') #define FOURCC_stts GST_MAKE_FOURCC('s','t','t','s') #define FOURCC_styp GST_MAKE_FOURCC('s','t','y','p') #define FOURCC_subp GST_MAKE_FOURCC('s','u','b','p') @@ -271,9 +275,14 @@ G_BEGIN_DECLS #define FOURCC_vp08 GST_MAKE_FOURCC('v','p','0','8') #define FOURCC_vp09 GST_MAKE_FOURCC('v','p','0','9') #define FOURCC_vpcC GST_MAKE_FOURCC('v','p','c','C') +#define FOURCC_vtta GST_MAKE_FOURCC('v','t','t','a') +#define FOURCC_vttc GST_MAKE_FOURCC('v','t','t','c') +#define FOURCC_vttC GST_MAKE_FOURCC('v','t','t','C') +#define FOURCC_vtte GST_MAKE_FOURCC('v','t','t','e') #define FOURCC_xvid GST_MAKE_FOURCC('x','v','i','d') #define FOURCC_wave GST_MAKE_FOURCC('w','a','v','e') #define FOURCC_wide GST_MAKE_FOURCC('w','i','d','e') +#define FOURCC_wvtt GST_MAKE_FOURCC('w','v','t','t') #define FOURCC_zlib GST_MAKE_FOURCC('z','l','i','b') #define FOURCC_lpcm GST_MAKE_FOURCC('l','p','c','m') #define FOURCC_av01 GST_MAKE_FOURCC('a','v','0','1') diff --git a/subprojects/gst-plugins-good/gst/isomp4/meson.build b/subprojects/gst-plugins-good/gst/isomp4/meson.build index b510c0510e..b10c09c406 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/meson.build +++ b/subprojects/gst-plugins-good/gst/isomp4/meson.build @@ -8,6 +8,7 @@ mp4_sources = [ 'qtdemux_lang.c', 'qtdemux_tags.c', 'qtdemux_tree.c', + 'qtdemux-webvtt.c', 'gstisoff.c', 'gstqtmux.c', 'gstqtmoovrecover.c', diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.c b/subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.c new file mode 100644 index 0000000000..ada3d4d177 --- /dev/null +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.c @@ -0,0 +1,221 @@ +/* GStreamer + * Copyright (C) 2008 Thijs Vermeir <thijsvermeir@gmail.com> + * Copyright (C) 2011 David Schleef <ds@schleef.org> + * Copyright (C) 2021 Jan Schmidt <jan@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "qtdemux-webvtt.h" +#include <gst/base/gstbytereader.h> + +#include "fourcc.h" +#include "qtdemux.h" +#include "qtatomparser.h" + +#include <stdlib.h> +#include <string.h> + +GST_DEBUG_CATEGORY_EXTERN (qtdemux_debug); +#define GST_CAT_DEFAULT qtdemux_debug + +gboolean +qtdemux_webvtt_is_empty (GstQTDemux * demux, guint8 * data, gsize size) +{ + GstByteReader br; + guint32 atom_size; + guint32 atom_type; + + gst_byte_reader_init (&br, data, size); + if (gst_byte_reader_get_remaining (&br) < 8) + return FALSE; + + if (!gst_byte_reader_get_uint32_be (&br, &atom_size) || + !qt_atom_parser_get_fourcc (&br, &atom_type)) + return FALSE; + + if (atom_type == FOURCC_vtte) + return TRUE; + + return FALSE; +} + +struct WebvttCue +{ + const guint8 *cue_id; + guint32 cue_id_len; + + const guint8 *cue_time; + guint32 cue_time_len; + + const guint8 *settings; + guint32 settings_len; + + const guint8 *cue_text; + guint32 cue_text_len; +}; + +static void +webvtt_append_timestamp_to_string (GstClockTime timestamp, GString * str) +{ + guint h, m, s, ms; + + h = timestamp / (3600 * GST_SECOND); + + timestamp -= h * 3600 * GST_SECOND; + m = timestamp / (60 * GST_SECOND); + + timestamp -= m * 60 * GST_SECOND; + s = timestamp / GST_SECOND; + + timestamp -= s * GST_SECOND; + ms = timestamp / GST_MSECOND; + + g_string_append_printf (str, "%02d:%02d:%02d.%03d", h, m, s, ms); +} + +static gboolean +webvtt_decode_vttc (GstQTDemux * qtdemux, GstByteReader * br, + GstClockTime start, GstClockTime duration, GString * s) +{ + struct WebvttCue cue = { 0, }; + gboolean have_data = FALSE; + + while (gst_byte_reader_get_remaining (br) >= 8) { + guint32 atom_size; + guint32 atom_type; + guint next_pos; + + if (!gst_byte_reader_get_uint32_be (br, &atom_size) || + !qt_atom_parser_get_fourcc (br, &atom_type)) + break; + + if (gst_byte_reader_get_remaining (br) < atom_size - 8) + break; + next_pos = gst_byte_reader_get_pos (br) - 8 + atom_size; + + GST_LOG_OBJECT (qtdemux, "WebVTT cue atom %" GST_FOURCC_FORMAT " len %u", + GST_FOURCC_ARGS (atom_type), atom_size); + + switch (atom_type) { + case FOURCC_ctim: + if (!gst_byte_reader_get_data (br, atom_size - 8, &cue.cue_time)) + return FALSE; + cue.cue_time_len = atom_size - 8; + break; + case FOURCC_iden: + if (!gst_byte_reader_get_data (br, atom_size - 8, &cue.cue_id)) + return FALSE; + cue.cue_id_len = atom_size - 8; + break; + case FOURCC_sttg: + if (!gst_byte_reader_get_data (br, atom_size - 8, &cue.settings)) + return FALSE; + cue.settings_len = atom_size - 8; + break; + case FOURCC_payl: + if (!gst_byte_reader_get_data (br, atom_size - 8, &cue.cue_text)) + return FALSE; + cue.cue_text_len = atom_size - 8; + have_data = TRUE; + break; + } + + if (!gst_byte_reader_set_pos (br, next_pos)) + break; + } + + if (have_data) { + if (cue.cue_id) + g_string_append_printf (s, "%.*s\n", cue.cue_id_len, cue.cue_id); + + /* Write the cue time and optional settings */ + webvtt_append_timestamp_to_string (start, s); + g_string_append_printf (s, " --> "); + webvtt_append_timestamp_to_string (start + duration, s); + + if (cue.settings) + g_string_append_printf (s, " %.*s\n", cue.settings_len, cue.settings); + else + g_string_append (s, "\n"); + + g_string_append_printf (s, "%.*s\n\n", cue.cue_text_len, cue.cue_text); + } + + return have_data; +} + +GstBuffer * +qtdemux_webvtt_decode (GstQTDemux * qtdemux, GstClockTime start, + GstClockTime duration, guint8 * data, gsize size) +{ + GstByteReader br; + GString *str = NULL; + GstBuffer *buf = NULL; + + gst_byte_reader_init (&br, data, size); + while (gst_byte_reader_get_remaining (&br) >= 8) { + guint32 atom_size; + guint32 atom_type; + guint next_pos; + + if (!gst_byte_reader_get_uint32_be (&br, &atom_size) || + !qt_atom_parser_get_fourcc (&br, &atom_type)) + break; + + if (gst_byte_reader_get_remaining (&br) < atom_size - 8) + break; + next_pos = gst_byte_reader_get_pos (&br) - 8 + atom_size; + + switch (atom_type) { + case FOURCC_vttc: + GST_LOG_OBJECT (qtdemux, + "WebVTT cue atom %" GST_FOURCC_FORMAT " len %u", + GST_FOURCC_ARGS (atom_type), atom_size); + if (str == NULL) + str = g_string_new (NULL); + if (!webvtt_decode_vttc (qtdemux, &br, start, duration, str)) + break; + break; + case FOURCC_vtte: + /* The empty segment case should be handled separately using qtdemux_webvtt_is_empty(). + * Ignore it during decode */ + break; + case FOURCC_vtta: + /* extra attributes */ + break; + default: + GST_DEBUG_OBJECT (qtdemux, + "Unknown WebVTT sample atom %" GST_FOURCC_FORMAT, + GST_FOURCC_ARGS (atom_type)); + break; + } + if (!gst_byte_reader_set_pos (&br, next_pos)) + break; + } + + if (str) { + gsize webvtt_len = str->len; + gchar *webvtt_chunk = g_string_free (str, FALSE); + buf = gst_buffer_new_wrapped (webvtt_chunk, webvtt_len); + } + + return buf; +} diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.h b/subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.h new file mode 100644 index 0000000000..d411c95092 --- /dev/null +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux-webvtt.h @@ -0,0 +1,32 @@ +/* GStreamer + * Copyright (C) <2021> Jan Schmidt <jan@centricular.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, + * Boston, MA 02110-1301, USA. + */ +#include <gst/gst.h> +#include "qtdemux.h" + +#ifndef __QTDEMUX_WEBVTT_H__ +#define __QTDEMUX_WEBVTT_H__ + +G_BEGIN_DECLS + +gboolean qtdemux_webvtt_is_empty(GstQTDemux *demux, guint8 *data, gsize size); +GstBuffer *qtdemux_webvtt_decode (GstQTDemux * qtdemux, GstClockTime start, GstClockTime duration, guint8 *data, gsize size); + +G_END_DECLS + +#endif diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c b/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c index 49f8ae4581..cb20edf08f 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux.c @@ -70,6 +70,7 @@ #include "qtpalette.h" #include "qtdemux_tags.h" #include "qtdemux_tree.h" +#include "qtdemux-webvtt.h" #include <stdlib.h> #include <string.h> @@ -5775,6 +5776,41 @@ gst_qtdemux_process_buffer_text (GstQTDemux * qtdemux, QtDemuxStream * stream, return buf; } +/* WebVTT sample handling according to 14496-30 */ +static GstBuffer * +gst_qtdemux_process_buffer_wvtt (GstQTDemux * qtdemux, QtDemuxStream * stream, + GstBuffer * buf) +{ + GstBuffer *outbuf = NULL; + GstMapInfo map; + + if (!gst_buffer_map (buf, &map, GST_MAP_READ)) { + g_assert_not_reached (); /* The buffer must be mappable */ + } + + if (qtdemux_webvtt_is_empty (qtdemux, map.data, map.size)) { + GstEvent *gap = NULL; + /* Push a gap event */ + stream->segment.position = GST_BUFFER_PTS (buf); + gap = + gst_event_new_gap (stream->segment.position, GST_BUFFER_DURATION (buf)); + gst_pad_push_event (stream->pad, gap); + + if (GST_BUFFER_DURATION_IS_VALID (buf)) + stream->segment.position += GST_BUFFER_DURATION (buf); + } else { + outbuf = + qtdemux_webvtt_decode (qtdemux, GST_BUFFER_PTS (buf), + GST_BUFFER_DURATION (buf), map.data, map.size); + gst_buffer_copy_into (outbuf, buf, GST_BUFFER_COPY_METADATA, 0, -1); + } + + gst_buffer_unmap (buf, &map); + gst_buffer_unref (buf); + + return outbuf; +} + static GstFlowReturn gst_qtdemux_push_buffer (GstQTDemux * qtdemux, QtDemuxStream * stream, GstBuffer * buf) @@ -6071,6 +6107,12 @@ gst_qtdemux_decorate_and_push_buffer (GstQTDemux * qtdemux, /* we're going to modify the metadata */ buf = gst_buffer_make_writable (buf); + GST_BUFFER_DTS (buf) = dts; + GST_BUFFER_PTS (buf) = pts; + GST_BUFFER_DURATION (buf) = duration; + GST_BUFFER_OFFSET (buf) = -1; + GST_BUFFER_OFFSET_END (buf) = -1; + if (G_UNLIKELY (stream->process_func)) buf = stream->process_func (qtdemux, stream, buf); @@ -6078,12 +6120,6 @@ gst_qtdemux_decorate_and_push_buffer (GstQTDemux * qtdemux, goto exit; } - GST_BUFFER_DTS (buf) = dts; - GST_BUFFER_PTS (buf) = pts; - GST_BUFFER_DURATION (buf) = duration; - GST_BUFFER_OFFSET (buf) = -1; - GST_BUFFER_OFFSET_END (buf) = -1; - if (!keyframe) { GST_BUFFER_FLAG_SET (buf, GST_BUFFER_FLAG_DELTA_UNIT); stream->on_keyframe = FALSE; @@ -6312,7 +6348,8 @@ gst_qtdemux_loop_state_movie (GstQTDemux * qtdemux) /* Only send gap events on non-subtitle streams if lagging way behind. */ if (stream->subtype == FOURCC_subp - || stream->subtype == FOURCC_text || stream->subtype == FOURCC_sbtl) + || stream->subtype == FOURCC_text || stream->subtype == FOURCC_sbtl || + stream->subtype == FOURCC_wvtt) gap_threshold = 1 * GST_SECOND; else gap_threshold = 3 * GST_SECOND; @@ -8854,7 +8891,7 @@ gst_qtdemux_add_stream (GstQTDemux * qtdemux, GST_DEBUG_OBJECT (qtdemux, "stream type, not creating pad"); } else if (stream->subtype == FOURCC_subp || stream->subtype == FOURCC_text || stream->subtype == FOURCC_sbtl || stream->subtype == FOURCC_subt - || stream->subtype == FOURCC_clcp) { + || stream->subtype == FOURCC_clcp || stream->subtype == FOURCC_wvtt) { gchar *name = g_strdup_printf ("subtitle_%u", qtdemux->n_sub_streams); stream->pad = @@ -12829,7 +12866,7 @@ qtdemux_parse_trak (GstQTDemux * qtdemux, GNode * trak) entry->sampled = TRUE; } else if (stream->subtype == FOURCC_subp || stream->subtype == FOURCC_text || stream->subtype == FOURCC_sbtl || stream->subtype == FOURCC_subt - || stream->subtype == FOURCC_clcp) { + || stream->subtype == FOURCC_clcp || stream->subtype == FOURCC_wvtt) { entry->sampled = TRUE; entry->sparse = TRUE; @@ -14991,6 +15028,22 @@ qtdemux_sub_caps (GstQTDemux * qtdemux, QtDemuxStream * stream, _codec ("XML subtitles"); caps = gst_caps_new_empty_simple ("application/ttml+xml"); break; + case FOURCC_wvtt: + { + GstBuffer *buffer; + const gchar *buf = "WEBVTT\n\n"; + + _codec ("WebVTT subtitles"); + caps = gst_caps_new_empty_simple ("application/x-subtitle-vtt"); + stream->process_func = gst_qtdemux_process_buffer_wvtt; + + /* FIXME: Parse the vttC atom and get the entire WEBVTT header */ + buffer = gst_buffer_new_and_alloc (8); + gst_buffer_fill (buffer, 0, buf, 8); + stream->buffers = g_slist_append (stream->buffers, buffer); + + break; + } case FOURCC_c608: _codec ("CEA 608 Closed Caption"); caps = diff --git a/subprojects/gst-plugins-good/gst/isomp4/qtdemux_types.c b/subprojects/gst-plugins-good/gst/isomp4/qtdemux_types.c index 15ad3e5e8b..3c2c18855c 100644 --- a/subprojects/gst-plugins-good/gst/isomp4/qtdemux_types.c +++ b/subprojects/gst-plugins-good/gst/isomp4/qtdemux_types.c @@ -218,6 +218,7 @@ static const QtNodeType qt_node_types[] = { {FOURCC_pssh, "protection system specific header", 0}, {FOURCC_tenc, "track encryption", 0}, {FOURCC_stpp, "XML subtitle sample entry", 0}, + {FOURCC_wvtt, "WebVTT subtitle sample entry", 0}, {FOURCC_clcp, "Closed Caption", 0}, {FOURCC_av01, "AV1 Sample Entry", 0}, {FOURCC_av1C, "AV1 Codec Configuration", 0}, @@ -227,6 +228,7 @@ static const QtNodeType qt_node_types[] = { {FOURCC_av1M, "AV1 Metadata sample group entry", 0}, {FOURCC_aavd, "AAX encrypted audio", 0}, {FOURCC_adrm, "AAX DRM key data", 0}, + {FOURCC_vttc, "VTTCueBox 14496-30", QT_FLAG_CONTAINER}, {0, "unknown", 0,}, };