subparse: allow per feature registration

Split plugin into features including
elements and device providers which
can be indiviually registered during
a static build.

More details here:
i
https://gitlab.freedesktop.org/gstreamer/gst-build/-/merge_requests/199
https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/661

Part-of: <https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/-/merge_requests/900>
This commit is contained in:
Stéphane Cerveau 2020-11-30 11:00:30 +01:00 committed by GStreamer Merge Bot
parent d58cf8b8d3
commit b6f8d0544c
7 changed files with 553 additions and 423 deletions

View file

@ -28,8 +28,11 @@
#include <string.h>
#include "gstssaparse.h"
#include "gstsubparseelements.h"
GST_DEBUG_CATEGORY_STATIC (ssa_parse_debug);
#undef GST_CAT_DEFAULT
#define GST_CAT_DEFAULT ssa_parse_debug
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
@ -46,6 +49,9 @@ static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
#define gst_ssa_parse_parent_class parent_class
G_DEFINE_TYPE (GstSsaParse, gst_ssa_parse, GST_TYPE_ELEMENT);
GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (ssaparse, "ssaparse",
GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE, sub_parse_element_init (plugin));
static GstStateChangeReturn gst_ssa_parse_change_state (GstElement *
element, GstStateChange transition);

View file

@ -32,13 +32,13 @@
#include <glib.h>
#include "gstsubparse.h"
#include "gstssaparse.h"
#include "samiparse.h"
#include "tmplayerparse.h"
#include "mpl2parse.h"
#include "qttextparse.h"
GST_DEBUG_CATEGORY (sub_parse_debug);
#include "gstsubparseelements.h"
#define DEFAULT_ENCODING NULL
#define ATTRIBUTE_REGEX "\\s?[a-zA-Z0-9\\. \t\\(\\)]*"
@ -93,8 +93,11 @@ static GstFlowReturn gst_sub_parse_chain (GstPad * sinkpad, GstObject * parent,
#define gst_sub_parse_parent_class parent_class
G_DEFINE_TYPE (GstSubParse, gst_sub_parse, GST_TYPE_ELEMENT);
static void
gst_sub_parse_dispose (GObject * object)
GST_ELEMENT_REGISTER_DEFINE_WITH_CODE (subparse, "subparse",
GST_RANK_PRIMARY, GST_TYPE_SUBPARSE, sub_parse_element_init (plugin))
static void gst_sub_parse_dispose (GObject * object)
{
GstSubParse *subparse = GST_SUBPARSE (object);
@ -392,52 +395,9 @@ gst_sub_parse_get_format_description (GstSubParseFormat format)
return NULL;
}
static gchar *
gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
gsize * consumed, GError ** err)
{
gchar *ret = NULL;
*consumed = 0;
/* The char cast is necessary in glib < 2.24 */
ret =
g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
consumed, NULL, err);
if (ret == NULL)
return ret;
/* + 3 to skip UTF-8 BOM if it was added */
len = strlen (ret);
if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
&& (guint8) ret[2] == 0xBF)
memmove (ret, ret + 3, len + 1 - 3);
return ret;
}
static gchar *
detect_encoding (const gchar * str, gsize len)
{
if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
&& (guint8) str[2] == 0xBF)
return g_strdup ("UTF-8");
if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
return g_strdup ("UTF-16BE");
if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
return g_strdup ("UTF-16LE");
if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
&& (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
return g_strdup ("UTF-32BE");
if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
&& (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
return g_strdup ("UTF-32LE");
return NULL;
}
static gchar *
convert_encoding (GstSubParse * self, const gchar * str, gsize len,
@ -452,7 +412,8 @@ convert_encoding (GstSubParse * self, const gchar * str, gsize len,
/* First try any detected encoding */
if (self->detected_encoding) {
ret =
gst_convert_to_utf8 (str, len, self->detected_encoding, consumed, &err);
gst_sub_parse_gst_convert_to_utf8 (str, len, self->detected_encoding,
consumed, &err);
if (!err)
return ret;
@ -488,7 +449,7 @@ convert_encoding (GstSubParse * self, const gchar * str, gsize len,
}
}
ret = gst_convert_to_utf8 (str, len, encoding, consumed, &err);
ret = gst_sub_parse_gst_convert_to_utf8 (str, len, encoding, consumed, &err);
if (err) {
GST_WARNING_OBJECT (self, "could not convert string from '%s' to UTF-8: %s",
@ -496,7 +457,9 @@ convert_encoding (GstSubParse * self, const gchar * str, gsize len,
g_clear_error (&err);
/* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
ret = gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed, NULL);
ret =
gst_sub_parse_gst_convert_to_utf8 (str, len, "ISO-8859-15", consumed,
NULL);
}
GST_LOG_OBJECT (self,
@ -1416,184 +1379,7 @@ parser_state_dispose (GstSubParse * self, ParserState * state)
state->allowed_tags = NULL;
}
/* regex type enum */
typedef enum
{
GST_SUB_PARSE_REGEX_UNKNOWN = 0,
GST_SUB_PARSE_REGEX_MDVDSUB = 1,
GST_SUB_PARSE_REGEX_SUBRIP = 2,
GST_SUB_PARSE_REGEX_DKS = 3,
GST_SUB_PARSE_REGEX_VTT = 4,
} GstSubParseRegex;
static gpointer
gst_sub_parse_data_format_autodetect_regex_once (GstSubParseRegex regtype)
{
gpointer result = NULL;
GError *gerr = NULL;
switch (regtype) {
case GST_SUB_PARSE_REGEX_MDVDSUB:
result =
(gpointer) g_regex_new ("^\\{[0-9]+\\}\\{[0-9]+\\}",
G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
if (result == NULL) {
g_warning ("Compilation of mdvd regex failed: %s", gerr->message);
g_clear_error (&gerr);
}
break;
case GST_SUB_PARSE_REGEX_SUBRIP:
result = (gpointer)
g_regex_new ("^[\\s\\n]*[\\n]? {0,3}[ 0-9]{1,4}\\s*(\x0d)?\x0a"
" ?[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,3}"
" +--> +[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,2}",
G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
if (result == NULL) {
g_warning ("Compilation of subrip regex failed: %s", gerr->message);
g_clear_error (&gerr);
}
break;
case GST_SUB_PARSE_REGEX_DKS:
result = (gpointer) g_regex_new ("^\\[[0-9]+:[0-9]+:[0-9]+\\].*",
G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
if (result == NULL) {
g_warning ("Compilation of dks regex failed: %s", gerr->message);
g_clear_error (&gerr);
}
break;
case GST_SUB_PARSE_REGEX_VTT:
result = (gpointer)
g_regex_new ("^(\\xef\\xbb\\xbf)?WEBVTT[\\xa\\xd\\x20\\x9]", 0, 0,
&gerr);
if (result == NULL) {
g_warning ("Compilation of vtt regex failed: %s", gerr->message);
g_error_free (gerr);
}
break;
default:
GST_WARNING ("Trying to allocate regex of unknown type %u", regtype);
}
return result;
}
/*
* FIXME: maybe we should pass along a second argument, the preceding
* text buffer, because that is how this originally worked, even though
* I don't really see the use of that.
*/
static GstSubParseFormat
gst_sub_parse_data_format_autodetect (gchar * match_str)
{
guint n1, n2, n3;
static GOnce mdvd_rx_once = G_ONCE_INIT;
static GOnce subrip_rx_once = G_ONCE_INIT;
static GOnce dks_rx_once = G_ONCE_INIT;
static GOnce vtt_rx_once = G_ONCE_INIT;
GRegex *mdvd_grx;
GRegex *subrip_grx;
GRegex *dks_grx;
GRegex *vtt_grx;
g_once (&mdvd_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_MDVDSUB);
g_once (&subrip_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_SUBRIP);
g_once (&dks_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_DKS);
g_once (&vtt_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_VTT);
mdvd_grx = (GRegex *) mdvd_rx_once.retval;
subrip_grx = (GRegex *) subrip_rx_once.retval;
dks_grx = (GRegex *) dks_rx_once.retval;
vtt_grx = (GRegex *) vtt_rx_once.retval;
if (g_regex_match (mdvd_grx, match_str, 0, NULL)) {
GST_LOG ("MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
}
if (g_regex_match (subrip_grx, match_str, 0, NULL)) {
GST_LOG ("SubRip (time based) format detected");
return GST_SUB_PARSE_FORMAT_SUBRIP;
}
if (g_regex_match (dks_grx, match_str, 0, NULL)) {
GST_LOG ("DKS (time based) format detected");
return GST_SUB_PARSE_FORMAT_DKS;
}
if (g_regex_match (vtt_grx, match_str, 0, NULL) == TRUE) {
GST_LOG ("WebVTT (time based) format detected");
return GST_SUB_PARSE_FORMAT_VTT;
}
if (!strncmp (match_str, "FORMAT=TIME", 11)) {
GST_LOG ("MPSub (time based) format detected");
return GST_SUB_PARSE_FORMAT_MPSUB;
}
if (strstr (match_str, "<SAMI>") != NULL ||
strstr (match_str, "<sami>") != NULL) {
GST_LOG ("SAMI (time based) format detected");
return GST_SUB_PARSE_FORMAT_SAMI;
}
/* we're boldly assuming the first subtitle appears within the first hour */
if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 ||
sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 ||
sscanf (match_str, "00:%02u:%02u,%u=", &n1, &n2, &n3) == 3) {
GST_LOG ("TMPlayer (time based) format detected");
return GST_SUB_PARSE_FORMAT_TMPLAYER;
}
if (sscanf (match_str, "[%u][%u]", &n1, &n2) == 2) {
GST_LOG ("MPL2 (time based) format detected");
return GST_SUB_PARSE_FORMAT_MPL2;
}
if (strstr (match_str, "[INFORMATION]") != NULL) {
GST_LOG ("SubViewer (time based) format detected");
return GST_SUB_PARSE_FORMAT_SUBVIEWER;
}
if (strstr (match_str, "{QTtext}") != NULL) {
GST_LOG ("QTtext (time based) format detected");
return GST_SUB_PARSE_FORMAT_QTTEXT;
}
/* We assume the LRC file starts immediately */
if (match_str[0] == '[') {
gboolean all_lines_good = TRUE;
gchar **split;
gchar **ptr;
ptr = split = g_strsplit (match_str, "\n", -1);
while (*ptr && *(ptr + 1)) {
gchar *str = *ptr;
gint len = strlen (str);
if (sscanf (str, "[%u:%02u.%02u]", &n1, &n2, &n3) == 3 ||
sscanf (str, "[%u:%02u.%03u]", &n1, &n2, &n3) == 3) {
all_lines_good = TRUE;
} else if (str[len - 1] == ']' && strchr (str, ':') != NULL) {
all_lines_good = TRUE;
} else {
all_lines_good = FALSE;
break;
}
ptr++;
}
g_strfreev (split);
if (all_lines_good)
return GST_SUB_PARSE_FORMAT_LRC;
}
GST_DEBUG ("no subtitle format detected");
return GST_SUB_PARSE_FORMAT_UNKNOWN;
}
static GstCaps *
gst_sub_parse_format_autodetect (GstSubParse * self)
@ -1823,7 +1609,8 @@ handle_buffer (GstSubParse * self, GstBuffer * buf)
GstMapInfo map;
gst_buffer_map (buf, &map, GST_MAP_READ);
self->detected_encoding = detect_encoding ((gchar *) map.data, map.size);
self->detected_encoding =
gst_sub_parse_detect_encoding ((gchar *) map.data, map.size);
gst_buffer_unmap (buf, &map);
self->first_buffer = FALSE;
self->state.fps_n = self->fps_n;
@ -2081,180 +1868,3 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition)
return ret;
}
/*
* Typefind support.
*/
/* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so;
* also, give different subtitle formats really different types */
static GstStaticCaps mpl2_caps =
GST_STATIC_CAPS ("application/x-subtitle-mpl2");
#define SUB_CAPS (gst_static_caps_get (&sub_caps))
static GstStaticCaps tmp_caps =
GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
#define TMP_CAPS (gst_static_caps_get (&tmp_caps))
static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
#define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
#define SAMI_CAPS (gst_static_caps_get (&smi_caps))
static GstStaticCaps dks_caps = GST_STATIC_CAPS ("application/x-subtitle-dks");
#define DKS_CAPS (gst_static_caps_get (&dks_caps))
static GstStaticCaps vtt_caps = GST_STATIC_CAPS ("application/x-subtitle-vtt");
#define VTT_CAPS (gst_static_caps_get (&vtt_caps))
static GstStaticCaps qttext_caps =
GST_STATIC_CAPS ("application/x-subtitle-qttext");
#define QTTEXT_CAPS (gst_static_caps_get (&qttext_caps))
static GstStaticCaps lrc_caps = GST_STATIC_CAPS ("application/x-subtitle-lrc");
#define LRC_CAPS (gst_static_caps_get (&lrc_caps))
static void
gst_subparse_type_find (GstTypeFind * tf, gpointer private)
{
GstSubParseFormat format;
const guint8 *data;
GstCaps *caps;
gchar *str;
gchar *encoding = NULL;
const gchar *end;
if (!(data = gst_type_find_peek (tf, 0, 129)))
return;
/* make sure string passed to _autodetect() is NUL-terminated */
str = g_malloc0 (129);
memcpy (str, data, 128);
if ((encoding = detect_encoding (str, 128)) != NULL) {
gchar *converted_str;
GError *err = NULL;
gsize tmp;
converted_str = gst_convert_to_utf8 (str, 128, encoding, &tmp, &err);
if (converted_str == NULL) {
GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
err->message);
g_clear_error (&err);
} else {
g_free (str);
str = converted_str;
}
g_free (encoding);
}
/* Check if at least the first 120 chars are valid UTF8,
* otherwise convert as always */
if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) {
gchar *converted_str;
gsize tmp;
const gchar *enc;
enc = g_getenv ("GST_SUBTITLE_ENCODING");
if (enc == NULL || *enc == '\0') {
/* if local encoding is UTF-8 and no encoding specified
* via the environment variable, assume ISO-8859-15 */
if (g_get_charset (&enc)) {
enc = "ISO-8859-15";
}
}
converted_str = gst_convert_to_utf8 (str, 128, enc, &tmp, NULL);
if (converted_str != NULL) {
g_free (str);
str = converted_str;
}
}
format = gst_sub_parse_data_format_autodetect (str);
g_free (str);
switch (format) {
case GST_SUB_PARSE_FORMAT_MDVDSUB:
GST_DEBUG ("MicroDVD format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SUBRIP:
GST_DEBUG ("SubRip format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_MPSUB:
GST_DEBUG ("MPSub format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SAMI:
GST_DEBUG ("SAMI (time-based) format detected");
caps = SAMI_CAPS;
break;
case GST_SUB_PARSE_FORMAT_TMPLAYER:
GST_DEBUG ("TMPlayer (time based) format detected");
caps = TMP_CAPS;
break;
/* FIXME: our MPL2 typefinding is not really good enough to warrant
* returning a high probability (however, since we registered our
* typefinder here with a rank of MARGINAL we should pretty much only
* be called if most other typefinders have already run */
case GST_SUB_PARSE_FORMAT_MPL2:
GST_DEBUG ("MPL2 (time based) format detected");
caps = MPL2_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SUBVIEWER:
GST_DEBUG ("SubViewer format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_DKS:
GST_DEBUG ("DKS format detected");
caps = DKS_CAPS;
break;
case GST_SUB_PARSE_FORMAT_QTTEXT:
GST_DEBUG ("QTtext format detected");
caps = QTTEXT_CAPS;
break;
case GST_SUB_PARSE_FORMAT_LRC:
GST_DEBUG ("LRC format detected");
caps = LRC_CAPS;
break;
case GST_SUB_PARSE_FORMAT_VTT:
GST_DEBUG ("WebVTT format detected");
caps = VTT_CAPS;
break;
default:
case GST_SUB_PARSE_FORMAT_UNKNOWN:
GST_DEBUG ("no subtitle format detected");
return;
}
/* if we're here, it's ok */
gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps);
}
static gboolean
plugin_init (GstPlugin * plugin)
{
GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser");
if (!gst_type_find_register (plugin, "subparse_typefind", GST_RANK_MARGINAL,
gst_subparse_type_find, "srt,sub,mpsub,mdvd,smi,txt,dks,vtt",
SUB_CAPS, NULL, NULL))
return FALSE;
if (!gst_element_register (plugin, "subparse",
GST_RANK_PRIMARY, GST_TYPE_SUBPARSE) ||
!gst_element_register (plugin, "ssaparse",
GST_RANK_PRIMARY, GST_TYPE_SSA_PARSE)) {
return FALSE;
}
return TRUE;
}
GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
GST_VERSION_MINOR,
subparse,
"Subtitle parsing",
plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)

View file

@ -24,30 +24,13 @@
#include <gst/gst.h>
#include <gst/base/gstadapter.h>
GST_DEBUG_CATEGORY_EXTERN (sub_parse_debug);
#define GST_CAT_DEFAULT sub_parse_debug
#include "gstsubparseelements.h"
G_BEGIN_DECLS
#define GST_TYPE_SUBPARSE (gst_sub_parse_get_type ())
G_DECLARE_FINAL_TYPE (GstSubParse, gst_sub_parse, GST, SUBPARSE, GstElement)
/* format enum */
typedef enum
{
GST_SUB_PARSE_FORMAT_UNKNOWN = 0,
GST_SUB_PARSE_FORMAT_MDVDSUB = 1,
GST_SUB_PARSE_FORMAT_SUBRIP = 2,
GST_SUB_PARSE_FORMAT_MPSUB = 3,
GST_SUB_PARSE_FORMAT_SAMI = 4,
GST_SUB_PARSE_FORMAT_TMPLAYER = 5,
GST_SUB_PARSE_FORMAT_MPL2 = 6,
GST_SUB_PARSE_FORMAT_SUBVIEWER = 7,
GST_SUB_PARSE_FORMAT_DKS = 8,
GST_SUB_PARSE_FORMAT_QTTEXT = 9,
GST_SUB_PARSE_FORMAT_LRC = 10,
GST_SUB_PARSE_FORMAT_VTT = 11
} GstSubParseFormat;
typedef struct {
int state;

View file

@ -0,0 +1,426 @@
/* GStreamer
* Copyright (C) 2020 Huawei Technologies Co., Ltd.
* @Author: Stéphane Cerveau <scerveau@collabora.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdio.h>
#include "gstsubparseelements.h"
GST_DEBUG_CATEGORY (sub_parse_debug);
/* regex type enum */
typedef enum
{
GST_SUB_PARSE_REGEX_UNKNOWN = 0,
GST_SUB_PARSE_REGEX_MDVDSUB = 1,
GST_SUB_PARSE_REGEX_SUBRIP = 2,
GST_SUB_PARSE_REGEX_DKS = 3,
GST_SUB_PARSE_REGEX_VTT = 4,
} GstSubParseRegex;
static gpointer
gst_sub_parse_data_format_autodetect_regex_once (GstSubParseRegex regtype)
{
gpointer result = NULL;
GError *gerr = NULL;
switch (regtype) {
case GST_SUB_PARSE_REGEX_MDVDSUB:
result =
(gpointer) g_regex_new ("^\\{[0-9]+\\}\\{[0-9]+\\}",
G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
if (result == NULL) {
g_warning ("Compilation of mdvd regex failed: %s", gerr->message);
g_clear_error (&gerr);
}
break;
case GST_SUB_PARSE_REGEX_SUBRIP:
result = (gpointer)
g_regex_new ("^[\\s\\n]*[\\n]? {0,3}[ 0-9]{1,4}\\s*(\x0d)?\x0a"
" ?[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,3}"
" +--> +[0-9]{1,2}: ?[0-9]{1,2}: ?[0-9]{1,2}[,.] {0,2}[0-9]{1,2}",
G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
if (result == NULL) {
g_warning ("Compilation of subrip regex failed: %s", gerr->message);
g_clear_error (&gerr);
}
break;
case GST_SUB_PARSE_REGEX_DKS:
result = (gpointer) g_regex_new ("^\\[[0-9]+:[0-9]+:[0-9]+\\].*",
G_REGEX_RAW | G_REGEX_OPTIMIZE, 0, &gerr);
if (result == NULL) {
g_warning ("Compilation of dks regex failed: %s", gerr->message);
g_clear_error (&gerr);
}
break;
case GST_SUB_PARSE_REGEX_VTT:
result = (gpointer)
g_regex_new ("^(\\xef\\xbb\\xbf)?WEBVTT[\\xa\\xd\\x20\\x9]", 0, 0,
&gerr);
if (result == NULL) {
g_warning ("Compilation of vtt regex failed: %s", gerr->message);
g_error_free (gerr);
}
break;
default:
GST_WARNING ("Trying to allocate regex of unknown type %u", regtype);
}
return result;
}
/*
* FIXME: maybe we should pass along a second argument, the preceding
* text buffer, because that is how this originally worked, even though
* I don't really see the use of that.
*/
GstSubParseFormat
gst_sub_parse_data_format_autodetect (gchar * match_str)
{
guint n1, n2, n3;
static GOnce mdvd_rx_once = G_ONCE_INIT;
static GOnce subrip_rx_once = G_ONCE_INIT;
static GOnce dks_rx_once = G_ONCE_INIT;
static GOnce vtt_rx_once = G_ONCE_INIT;
GRegex *mdvd_grx;
GRegex *subrip_grx;
GRegex *dks_grx;
GRegex *vtt_grx;
g_once (&mdvd_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_MDVDSUB);
g_once (&subrip_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_SUBRIP);
g_once (&dks_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_DKS);
g_once (&vtt_rx_once,
(GThreadFunc) gst_sub_parse_data_format_autodetect_regex_once,
(gpointer) GST_SUB_PARSE_REGEX_VTT);
mdvd_grx = (GRegex *) mdvd_rx_once.retval;
subrip_grx = (GRegex *) subrip_rx_once.retval;
dks_grx = (GRegex *) dks_rx_once.retval;
vtt_grx = (GRegex *) vtt_rx_once.retval;
if (g_regex_match (mdvd_grx, match_str, 0, NULL)) {
GST_LOG ("MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
}
if (g_regex_match (subrip_grx, match_str, 0, NULL)) {
GST_LOG ("SubRip (time based) format detected");
return GST_SUB_PARSE_FORMAT_SUBRIP;
}
if (g_regex_match (dks_grx, match_str, 0, NULL)) {
GST_LOG ("DKS (time based) format detected");
return GST_SUB_PARSE_FORMAT_DKS;
}
if (g_regex_match (vtt_grx, match_str, 0, NULL) == TRUE) {
GST_LOG ("WebVTT (time based) format detected");
return GST_SUB_PARSE_FORMAT_VTT;
}
if (!strncmp (match_str, "FORMAT=TIME", 11)) {
GST_LOG ("MPSub (time based) format detected");
return GST_SUB_PARSE_FORMAT_MPSUB;
}
if (strstr (match_str, "<SAMI>") != NULL ||
strstr (match_str, "<sami>") != NULL) {
GST_LOG ("SAMI (time based) format detected");
return GST_SUB_PARSE_FORMAT_SAMI;
}
/* we're boldly assuming the first subtitle appears within the first hour */
if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 ||
sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 ||
sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 ||
sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 ||
sscanf (match_str, "00:%02u:%02u,%u=", &n1, &n2, &n3) == 3) {
GST_LOG ("TMPlayer (time based) format detected");
return GST_SUB_PARSE_FORMAT_TMPLAYER;
}
if (sscanf (match_str, "[%u][%u]", &n1, &n2) == 2) {
GST_LOG ("MPL2 (time based) format detected");
return GST_SUB_PARSE_FORMAT_MPL2;
}
if (strstr (match_str, "[INFORMATION]") != NULL) {
GST_LOG ("SubViewer (time based) format detected");
return GST_SUB_PARSE_FORMAT_SUBVIEWER;
}
if (strstr (match_str, "{QTtext}") != NULL) {
GST_LOG ("QTtext (time based) format detected");
return GST_SUB_PARSE_FORMAT_QTTEXT;
}
/* We assume the LRC file starts immediately */
if (match_str[0] == '[') {
gboolean all_lines_good = TRUE;
gchar **split;
gchar **ptr;
ptr = split = g_strsplit (match_str, "\n", -1);
while (*ptr && *(ptr + 1)) {
gchar *str = *ptr;
gint len = strlen (str);
if (sscanf (str, "[%u:%02u.%02u]", &n1, &n2, &n3) == 3 ||
sscanf (str, "[%u:%02u.%03u]", &n1, &n2, &n3) == 3) {
all_lines_good = TRUE;
} else if (str[len - 1] == ']' && strchr (str, ':') != NULL) {
all_lines_good = TRUE;
} else {
all_lines_good = FALSE;
break;
}
ptr++;
}
g_strfreev (split);
if (all_lines_good)
return GST_SUB_PARSE_FORMAT_LRC;
}
GST_DEBUG ("no subtitle format detected");
return GST_SUB_PARSE_FORMAT_UNKNOWN;
}
gchar *
gst_sub_parse_gst_convert_to_utf8 (const gchar * str, gsize len,
const gchar * encoding, gsize * consumed, GError ** err)
{
gchar *ret = NULL;
*consumed = 0;
/* The char cast is necessary in glib < 2.24 */
ret =
g_convert_with_fallback (str, len, "UTF-8", encoding, (char *) "*",
consumed, NULL, err);
if (ret == NULL)
return ret;
/* + 3 to skip UTF-8 BOM if it was added */
len = strlen (ret);
if (len >= 3 && (guint8) ret[0] == 0xEF && (guint8) ret[1] == 0xBB
&& (guint8) ret[2] == 0xBF)
memmove (ret, ret + 3, len + 1 - 3);
return ret;
}
gchar *
gst_sub_parse_detect_encoding (const gchar * str, gsize len)
{
if (len >= 3 && (guint8) str[0] == 0xEF && (guint8) str[1] == 0xBB
&& (guint8) str[2] == 0xBF)
return g_strdup ("UTF-8");
if (len >= 2 && (guint8) str[0] == 0xFE && (guint8) str[1] == 0xFF)
return g_strdup ("UTF-16BE");
if (len >= 2 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE)
return g_strdup ("UTF-16LE");
if (len >= 4 && (guint8) str[0] == 0x00 && (guint8) str[1] == 0x00
&& (guint8) str[2] == 0xFE && (guint8) str[3] == 0xFF)
return g_strdup ("UTF-32BE");
if (len >= 4 && (guint8) str[0] == 0xFF && (guint8) str[1] == 0xFE
&& (guint8) str[2] == 0x00 && (guint8) str[3] == 0x00)
return g_strdup ("UTF-32LE");
return NULL;
}
/*
* Typefind support.
*/
/* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so;
* also, give different subtitle formats really different types */
static GstStaticCaps mpl2_caps =
GST_STATIC_CAPS ("application/x-subtitle-mpl2");
#define SUB_CAPS (gst_static_caps_get (&sub_caps))
static GstStaticCaps tmp_caps =
GST_STATIC_CAPS ("application/x-subtitle-tmplayer");
#define TMP_CAPS (gst_static_caps_get (&tmp_caps))
static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle");
#define MPL2_CAPS (gst_static_caps_get (&mpl2_caps))
static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami");
#define SAMI_CAPS (gst_static_caps_get (&smi_caps))
static GstStaticCaps dks_caps = GST_STATIC_CAPS ("application/x-subtitle-dks");
#define DKS_CAPS (gst_static_caps_get (&dks_caps))
static GstStaticCaps vtt_caps = GST_STATIC_CAPS ("application/x-subtitle-vtt");
#define VTT_CAPS (gst_static_caps_get (&vtt_caps))
static GstStaticCaps qttext_caps =
GST_STATIC_CAPS ("application/x-subtitle-qttext");
#define QTTEXT_CAPS (gst_static_caps_get (&qttext_caps))
static GstStaticCaps lrc_caps = GST_STATIC_CAPS ("application/x-subtitle-lrc");
#define LRC_CAPS (gst_static_caps_get (&lrc_caps))
static void
gst_sub_parse_type_find (GstTypeFind * tf, gpointer private)
{
GstSubParseFormat format;
const guint8 *data;
GstCaps *caps;
gchar *str;
gchar *encoding = NULL;
const gchar *end;
if (!(data = gst_type_find_peek (tf, 0, 129)))
return;
/* make sure string passed to _autodetect() is NUL-terminated */
str = g_malloc0 (129);
memcpy (str, data, 128);
if ((encoding = gst_sub_parse_detect_encoding (str, 128)) != NULL) {
gchar *converted_str;
GError *err = NULL;
gsize tmp;
converted_str =
gst_sub_parse_gst_convert_to_utf8 (str, 128, encoding, &tmp, &err);
if (converted_str == NULL) {
GST_DEBUG ("Encoding '%s' detected but conversion failed: %s", encoding,
err->message);
g_clear_error (&err);
} else {
g_free (str);
str = converted_str;
}
g_free (encoding);
}
/* Check if at least the first 120 chars are valid UTF8,
* otherwise convert as always */
if (!g_utf8_validate (str, 128, &end) && (end - str) < 120) {
gchar *converted_str;
gsize tmp;
const gchar *enc;
enc = g_getenv ("GST_SUBTITLE_ENCODING");
if (enc == NULL || *enc == '\0') {
/* if local encoding is UTF-8 and no encoding specified
* via the environment variable, assume ISO-8859-15 */
if (g_get_charset (&enc)) {
enc = "ISO-8859-15";
}
}
converted_str =
gst_sub_parse_gst_convert_to_utf8 (str, 128, enc, &tmp, NULL);
if (converted_str != NULL) {
g_free (str);
str = converted_str;
}
}
format = gst_sub_parse_data_format_autodetect (str);
g_free (str);
switch (format) {
case GST_SUB_PARSE_FORMAT_MDVDSUB:
GST_DEBUG ("MicroDVD format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SUBRIP:
GST_DEBUG ("SubRip format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_MPSUB:
GST_DEBUG ("MPSub format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SAMI:
GST_DEBUG ("SAMI (time-based) format detected");
caps = SAMI_CAPS;
break;
case GST_SUB_PARSE_FORMAT_TMPLAYER:
GST_DEBUG ("TMPlayer (time based) format detected");
caps = TMP_CAPS;
break;
/* FIXME: our MPL2 typefinding is not really good enough to warrant
* returning a high probability (however, since we registered our
* typefinder here with a rank of MARGINAL we should pretty much only
* be called if most other typefinders have already run */
case GST_SUB_PARSE_FORMAT_MPL2:
GST_DEBUG ("MPL2 (time based) format detected");
caps = MPL2_CAPS;
break;
case GST_SUB_PARSE_FORMAT_SUBVIEWER:
GST_DEBUG ("SubViewer format detected");
caps = SUB_CAPS;
break;
case GST_SUB_PARSE_FORMAT_DKS:
GST_DEBUG ("DKS format detected");
caps = DKS_CAPS;
break;
case GST_SUB_PARSE_FORMAT_QTTEXT:
GST_DEBUG ("QTtext format detected");
caps = QTTEXT_CAPS;
break;
case GST_SUB_PARSE_FORMAT_LRC:
GST_DEBUG ("LRC format detected");
caps = LRC_CAPS;
break;
case GST_SUB_PARSE_FORMAT_VTT:
GST_DEBUG ("WebVTT format detected");
caps = VTT_CAPS;
break;
default:
case GST_SUB_PARSE_FORMAT_UNKNOWN:
GST_DEBUG ("no subtitle format detected");
return;
}
/* if we're here, it's ok */
gst_type_find_suggest (tf, GST_TYPE_FIND_MAXIMUM, caps);
}
GST_TYPE_FIND_REGISTER_DEFINE (subparse, "subparse_typefind", GST_RANK_MARGINAL,
gst_sub_parse_type_find, "srt,sub,mpsub,mdvd,smi,txt,dks,vtt", SUB_CAPS,
NULL, NULL)
gboolean
sub_parse_element_init (GstPlugin * plugin)
{
static gsize res = FALSE;
gboolean ret = TRUE;
if (g_once_init_enter (&res)) {
GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser");
ret |= GST_TYPE_FIND_REGISTER (subparse, plugin);
g_once_init_leave (&res, TRUE);
}
return ret;
}

View file

@ -0,0 +1,58 @@
/* GStreamer
* Copyright (C) <2002> David A. Schleef <ds@schleef.org>
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
* Copyright (C) 2020 Huawei Technologies Co., Ltd.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __GST_SUBPARSE_ELEMENT_H__
#define __GST_SUBPARSE_ELEMENT_H__
#include <gst/gst.h>
/* format enum */
typedef enum
{
GST_SUB_PARSE_FORMAT_UNKNOWN = 0,
GST_SUB_PARSE_FORMAT_MDVDSUB = 1,
GST_SUB_PARSE_FORMAT_SUBRIP = 2,
GST_SUB_PARSE_FORMAT_MPSUB = 3,
GST_SUB_PARSE_FORMAT_SAMI = 4,
GST_SUB_PARSE_FORMAT_TMPLAYER = 5,
GST_SUB_PARSE_FORMAT_MPL2 = 6,
GST_SUB_PARSE_FORMAT_SUBVIEWER = 7,
GST_SUB_PARSE_FORMAT_DKS = 8,
GST_SUB_PARSE_FORMAT_QTTEXT = 9,
GST_SUB_PARSE_FORMAT_LRC = 10,
GST_SUB_PARSE_FORMAT_VTT = 11
} GstSubParseFormat;
G_GNUC_INTERNAL GstSubParseFormat gst_sub_parse_data_format_autodetect (gchar * match_str);
G_GNUC_INTERNAL gchar * gst_sub_parse_detect_encoding (const gchar * str, gsize len);
G_GNUC_INTERNAL gchar * gst_sub_parse_gst_convert_to_utf8 (const gchar * str, gsize len, const gchar * encoding,
gsize * consumed, GError ** err);
G_GNUC_INTERNAL gboolean sub_parse_element_init (GstPlugin * plugin);
GST_ELEMENT_REGISTER_DECLARE (subparse);
GST_ELEMENT_REGISTER_DECLARE (ssaparse);
GST_TYPE_FIND_REGISTER_DECLARE (subparse);
GST_DEBUG_CATEGORY_EXTERN (sub_parse_debug);
#define GST_CAT_DEFAULT sub_parse_debug
#endif /* __GST_SUBPARSE_ELEMENT_H__ */

View file

@ -0,0 +1,45 @@
/* GStreamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
* Copyright (C) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
* Copyright (C) 2006 Tim-Philipp Müller <tim centricular net>
* Copyright (C) 2016 Philippe Normand <pnormand@igalia.com>
* Copyright (C) 2016 Jan Schmidt <jan@centricular.com>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
* Boston, MA 02110-1301, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "gstsubparseelements.h"
static gboolean
plugin_init (GstPlugin * plugin)
{
gboolean ret = FALSE;
ret |= GST_ELEMENT_REGISTER (subparse, plugin);
ret |= GST_ELEMENT_REGISTER (ssaparse, plugin);
return ret;
}
GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
GST_VERSION_MINOR,
subparse,
"Subtitle parsing",
plugin_init, VERSION, "LGPL", GST_PACKAGE_NAME, GST_PACKAGE_ORIGIN)

View file

@ -1,6 +1,8 @@
subparse_sources = [
'gstssaparse.c',
'gstsubparse.c',
'gstsubparseelement.c',
'gstsubparseplugin.c',
'samiparse.c',
'tmplayerparse.c',
'mpl2parse.c',