gstreamer/gst/subparse/gstsubparse.c
Andy Wingo 21c3b52296 gst/audiorate/gstaudiorate.c (gst_audiorate_class_init): Pacify
Original commit message from CVS:
2005-05-05  Andy Wingo  <wingo@pobox.com>

* gst/audiorate/gstaudiorate.c (gst_audiorate_class_init): Pacify
GObject.
* configure.ac: Return audiorate and subparse from the ghetto.
Re-enable -Wall -Werror.
* gst/subparse/gstsubparse.c:
* gst/subparse/gstsubparse.h: Port to 0.9. Can operate loop-based
or chain-based. Cleaned up a bit. Not tested.
2005-05-06 03:32:51 +00:00

692 lines
18 KiB
C

/* GStreamer
* Copyright (C) <1999> Erik Walthinsen <omega@cse.ogi.edu>
* Copyright (c) 2004 Ronald S. Bultje <rbultje@ronald.bitfreak.net>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Library General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Library General Public License for more details.
*
* You should have received a copy of the GNU Library General Public
* License along with this library; if not, write to the
* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
* Boston, MA 02111-1307, USA.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <regex.h>
#include "gstsubparse.h"
GST_DEBUG_CATEGORY_STATIC (subparse_debug);
#define GST_CAT_DEFAULT subparse_debug
static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("application/x-subtitle")
);
static GstStaticPadTemplate src_templ = GST_STATIC_PAD_TEMPLATE ("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS ("text/plain; text/x-pango-markup")
);
static void gst_subparse_base_init (GstSubparseClass * klass);
static void gst_subparse_class_init (GstSubparseClass * klass);
static void gst_subparse_init (GstSubparse * subparse);
static const GstFormat *gst_subparse_formats (GstPad * pad);
static const GstEventMask *gst_subparse_src_eventmask (GstPad * pad);
static gboolean gst_subparse_src_event (GstPad * pad, GstEvent * event);
static GstElementStateReturn gst_subparse_change_state (GstElement * element);
#if 0
static void gst_subparse_loop (GstPad * sinkpad);
#endif
static GstFlowReturn gst_subparse_chain (GstPad * sinkpad, GstBuffer * buf);
#if 0
static GstCaps *gst_subparse_type_find (GstBuffer * buf, gpointer private);
#endif
static GstElementClass *parent_class = NULL;
GType
gst_subparse_get_type (void)
{
static GType subparse_type = 0;
if (!subparse_type) {
static const GTypeInfo subparse_info = {
sizeof (GstSubparseClass),
(GBaseInitFunc) gst_subparse_base_init,
NULL,
(GClassInitFunc) gst_subparse_class_init,
NULL,
NULL,
sizeof (GstSubparse),
0,
(GInstanceInitFunc) gst_subparse_init,
};
subparse_type = g_type_register_static (GST_TYPE_ELEMENT,
"GstSubparse", &subparse_info, 0);
}
return subparse_type;
}
static void
gst_subparse_base_init (GstSubparseClass * klass)
{
GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
static GstElementDetails subparse_details = {
"Subtitle parsers",
"Codec/Parser/Subtitle",
"Parses subtitle (.sub) files into text streams",
"Gustavo J. A. M. Carneiro <gjc@inescporto.pt>\n"
"Ronald S. Bultje <rbultje@ronald.bitfreak.net>"
};
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get (&sink_templ));
gst_element_class_add_pad_template (element_class,
gst_static_pad_template_get (&src_templ));
gst_element_class_set_details (element_class, &subparse_details);
}
static void
gst_subparse_class_init (GstSubparseClass * klass)
{
GstElementClass *element_class = GST_ELEMENT_CLASS (klass);
parent_class = g_type_class_ref (GST_TYPE_ELEMENT);
element_class->change_state = gst_subparse_change_state;
}
static void
gst_subparse_init (GstSubparse * subparse)
{
subparse->sinkpad =
gst_pad_new_from_template (gst_static_pad_template_get (&sink_templ),
"sink");
gst_pad_set_chain_function (subparse->sinkpad, gst_subparse_chain);
gst_element_add_pad (GST_ELEMENT (subparse), subparse->sinkpad);
subparse->srcpad =
gst_pad_new_from_template (gst_static_pad_template_get (&src_templ),
"src");
gst_pad_set_formats_function (subparse->srcpad, gst_subparse_formats);
gst_pad_set_event_function (subparse->srcpad, gst_subparse_src_event);
gst_pad_set_event_mask_function (subparse->srcpad,
gst_subparse_src_eventmask);
gst_element_add_pad (GST_ELEMENT (subparse), subparse->srcpad);
subparse->textbuf = g_string_new (NULL);
subparse->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
}
/*
* Source pad functions.
*/
static const GstFormat *
gst_subparse_formats (GstPad * pad)
{
static const GstFormat formats[] = {
GST_FORMAT_TIME,
0
};
return formats;
}
static const GstEventMask *
gst_subparse_src_eventmask (GstPad * pad)
{
static const GstEventMask masks[] = {
{GST_EVENT_SEEK, GST_SEEK_METHOD_SET},
{0, 0}
};
return masks;
}
static gboolean
gst_subparse_src_event (GstPad * pad, GstEvent * event)
{
GstSubparse *self = GST_SUBPARSE (gst_pad_get_parent (pad));
#define grvif(x,y) g_return_val_if_fail (x, y)
/* we guaranteed these with the eventmask */
grvif (GST_EVENT_TYPE (event) == GST_EVENT_SEEK, FALSE);
grvif (GST_EVENT_SEEK_FORMAT (event) == GST_FORMAT_TIME, FALSE);
grvif (GST_EVENT_SEEK_METHOD (event) == GST_SEEK_METHOD_SET, FALSE);
gst_event_unref (event);
GST_STREAM_LOCK (self->sinkpad);
/* just seek to 0, rely on the overlayer to throw away buffers until the right
time -- and his mother cried... */
self->next_offset = 0;
GST_STREAM_UNLOCK (self->sinkpad);
return TRUE;
}
static gchar *
convert_encoding (GstSubparse * self, const gchar * str, gsize len)
{
gsize bytes_read, bytes_written;
gchar *rv;
GString *converted;
converted = g_string_new (NULL);
while (len) {
#ifndef GST_DISABLE_GST_DEBUG
gchar *dbg = g_strndup (str, len);
GST_DEBUG ("Trying to convert '%s'", dbg);
g_free (dbg);
#endif
rv = g_locale_to_utf8 (str, len, &bytes_read, &bytes_written, NULL);
if (rv) {
g_string_append_len (converted, rv, bytes_written);
g_free (rv);
len -= bytes_read;
str += bytes_read;
}
if (len) {
/* conversion error ocurred => skip one char */
len--;
str++;
g_string_append_c (converted, '?');
}
}
rv = converted->str;
g_string_free (converted, FALSE);
GST_DEBUG ("Converted to '%s'", rv);
return rv;
}
static gchar *
get_next_line (GstSubparse * self)
{
char *line = NULL;
const char *line_end;
int line_len;
gboolean have_r = FALSE;
line_end = strchr (self->textbuf->str, '\n');
if (!line_end) {
/* end-of-line not found; return for more data */
return NULL;
}
/* get rid of '\r' */
if (line_end != self->textbuf->str && *(line_end - 1) == '\r') {
line_end--;
have_r = TRUE;
}
line_len = line_end - self->textbuf->str;
line = convert_encoding (self, self->textbuf->str, line_len);
self->textbuf = g_string_erase (self->textbuf, 0,
line_len + (have_r ? 2 : 1));
return line;
}
static gchar *
parse_mdvdsub (ParserState * state, const gchar * line)
{
const gchar *line_split;
gchar *line_chunk;
guint start_frame, end_frame;
/* FIXME: hardcoded for now, but detecting the correct value is
* not going to be easy, I suspect... */
const double frames_per_sec = 24000 / 1001.;
GString *markup;
gchar *ret;
/* style variables */
gboolean italic;
gboolean bold;
guint fontsize;
if (sscanf (line, "{%u}{%u}", &start_frame, &end_frame) != 2) {
g_warning ("Parse of the following line, assumed to be in microdvd .sub"
" format, failed:\n%s", line);
return NULL;
}
state->start_time = (start_frame - 1000) / frames_per_sec * GST_SECOND;
state->duration = (end_frame - start_frame) / frames_per_sec * GST_SECOND;
/* skip the {%u}{%u} part */
line = strchr (line, '}') + 1;
line = strchr (line, '}') + 1;
markup = g_string_new (NULL);
while (1) {
italic = FALSE;
bold = FALSE;
fontsize = 0;
/* parse style markup */
if (strncmp (line, "{y:i}", 5) == 0) {
italic = TRUE;
line = strchr (line, '}') + 1;
}
if (strncmp (line, "{y:b}", 5) == 0) {
bold = TRUE;
line = strchr (line, '}') + 1;
}
if (sscanf (line, "{s:%u}", &fontsize) == 1) {
line = strchr (line, '}') + 1;
}
if ((line_split = strchr (line, '|')))
line_chunk = g_markup_escape_text (line, line_split - line);
else
line_chunk = g_markup_escape_text (line, strlen (line));
markup = g_string_append (markup, "<span");
if (italic)
g_string_append (markup, " style=\"italic\"");
if (bold)
g_string_append (markup, " weight=\"bold\"");
if (fontsize)
g_string_append_printf (markup, " size=\"%u\"", fontsize * 1000);
g_string_append_printf (markup, ">%s</span>", line_chunk);
g_free (line_chunk);
if (line_split) {
g_string_append (markup, "\n");
line = line_split + 1;
} else {
break;
}
}
ret = markup->str;
g_string_free (markup, FALSE);
GST_DEBUG ("parse_mdvdsub returning (%f+%f): %s",
state->start_time / (double) GST_SECOND,
state->duration / (double) GST_SECOND, ret);
return ret;
}
static gchar *
parse_subrip (ParserState * state, const gchar * line)
{
guint h1, m1, s1, ms1;
guint h2, m2, s2, ms2;
int subnum;
gchar *ret;
switch (state->state) {
case 0:
/* looking for a single integer */
if (sscanf (line, "%u", &subnum) == 1)
state->state = 1;
return NULL;
case 1:
/* looking for start_time --> end_time */
if (sscanf (line, "%u:%u:%u,%u --> %u:%u:%u,%u",
&h1, &m1, &s1, &ms1, &h2, &m2, &s2, &ms2) == 8) {
state->state = 2;
state->start_time =
(((guint64) h1) * 3600 + m1 * 60 + s1) * GST_SECOND +
ms1 * GST_MSECOND;
state->duration =
(((guint64) h2) * 3600 + m2 * 60 + s2) * GST_SECOND +
ms2 * GST_MSECOND - state->start_time;
} else {
GST_DEBUG (0, "error parsing subrip time line");
state->state = 0;
}
return NULL;
case 2:
/* looking for subtitle text; empty line ends this
* subtitle entry */
if (state->buf->len)
g_string_append_c (state->buf, '\n');
g_string_append (state->buf, line);
if (strlen (line) == 0) {
ret = g_markup_escape_text (state->buf->str, state->buf->len);
g_string_truncate (state->buf, 0);
state->state = 0;
return ret;
}
return NULL;
default:
g_assert_not_reached ();
}
}
static gchar *
parse_mpsub (ParserState * state, const gchar * line)
{
gchar *ret;
float t1, t2;
switch (state->state) {
case 0:
/* looking for two floats (offset, duration) */
if (sscanf (line, "%f %f", &t1, &t2) == 2) {
state->state = 1;
state->start_time += state->duration + GST_SECOND * t1;
state->duration = GST_SECOND * t2;
}
return NULL;
case 1:
/* looking for subtitle text; empty line ends this
* subtitle entry */
if (state->buf->len)
g_string_append_c (state->buf, '\n');
g_string_append (state->buf, line);
if (strlen (line) == 0) {
ret = g_strdup (state->buf->str);
g_string_truncate (state->buf, 0);
state->state = 0;
return ret;
}
return NULL;
default:
g_assert_not_reached ();
}
}
static void
parser_state_init (ParserState * state)
{
if (state->buf) {
g_string_truncate (state->buf, 0);
} else {
state->buf = g_string_new (NULL);
}
state->start_time = state->duration = state->state = 0;
}
static void
parser_state_dispose (ParserState * state)
{
if (state->buf) {
g_string_free (state->buf, TRUE);
state->buf = NULL;
}
}
/*
* FIXME: maybe we should pass along a second argument, the preceding
* text buffer, because that is how this originally worked, even though
* I don't really see the use of that.
*/
static GstSubParseFormat
gst_subparse_data_format_autodetect (gchar * match_str)
{
static gboolean need_init_regexps = TRUE;
static regex_t mdvd_rx;
static regex_t subrip_rx;
/* initialize the regexps used the first time around */
if (need_init_regexps) {
int err;
char errstr[128];
need_init_regexps = FALSE;
if ((err = regcomp (&mdvd_rx, "^\\{[0-9]+\\}\\{[0-9]+\\}",
REG_EXTENDED | REG_NEWLINE | REG_NOSUB) != 0) ||
(err = regcomp (&subrip_rx, "^1(\x0d)?\x0a"
"[0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9]{3}"
" --> [0-9][0-9]:[0-9][0-9]:[0-9][0-9],[0-9]{3}",
REG_EXTENDED | REG_NEWLINE | REG_NOSUB)) != 0) {
regerror (err, &subrip_rx, errstr, 127);
GST_WARNING ("Compilation of subrip regex failed: %s", errstr);
}
}
if (regexec (&mdvd_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("subparse: MicroDVD (frame based) format detected");
return GST_SUB_PARSE_FORMAT_MDVDSUB;
}
if (regexec (&subrip_rx, match_str, 0, NULL, 0) == 0) {
GST_LOG ("subparse: SubRip (time based) format detected");
return GST_SUB_PARSE_FORMAT_SUBRIP;
}
if (!strncmp (match_str, "FORMAT=TIME", 11)) {
GST_LOG ("subparse: MPSub (time based) format detected");
return GST_SUB_PARSE_FORMAT_MPSUB;
}
GST_WARNING ("subparse: subtitle format autodetection failed!");
return GST_SUB_PARSE_FORMAT_UNKNOWN;
}
static GstCaps *
gst_subparse_format_autodetect (GstSubparse * self)
{
gchar *data;
GstSubParseFormat format;
if (strlen (self->textbuf->str) < 35) {
GST_DEBUG ("File too small to be a subtitles file");
return NULL;
}
data = g_strndup (self->textbuf->str, 35);
format = gst_subparse_data_format_autodetect (data);
g_free (data);
self->parser_type = format;
parser_state_init (&self->state);
switch (format) {
case GST_SUB_PARSE_FORMAT_MDVDSUB:
self->parse_line = parse_mdvdsub;
return gst_caps_new_simple ("text/x-pango-markup", NULL);
case GST_SUB_PARSE_FORMAT_SUBRIP:
self->parse_line = parse_subrip;
return gst_caps_new_simple ("text/plain", NULL);
case GST_SUB_PARSE_FORMAT_MPSUB:
self->parse_line = parse_mpsub;
return gst_caps_new_simple ("text/plain", NULL);
case GST_SUB_PARSE_FORMAT_UNKNOWN:
default:
GST_DEBUG ("no subtitle format detected");
GST_ELEMENT_ERROR (self, STREAM, WRONG_TYPE,
("The input is not a valid/supported subtitle file"), (NULL));
return NULL;
}
}
static void
feed_textbuf (GstSubparse * self, GstBuffer * buf)
{
if (GST_BUFFER_OFFSET (buf) != self->offset) {
/* flush the parser state */
parser_state_init (&self->state);
g_string_truncate (self->textbuf, 0);
}
self->textbuf = g_string_append_len (self->textbuf,
(gchar *) GST_BUFFER_DATA (buf), GST_BUFFER_SIZE (buf));
gst_buffer_unref (buf);
self->offset = GST_BUFFER_OFFSET (buf) + GST_BUFFER_SIZE (buf);
self->next_offset = self->offset;
}
static GstFlowReturn
handle_buffer (GstSubparse * self, GstBuffer * buf)
{
GstCaps *caps = NULL;
gchar *line, *subtitle;
feed_textbuf (self, buf);
/* make sure we know the format */
if (G_UNLIKELY (self->parser_type == GST_SUB_PARSE_FORMAT_UNKNOWN)) {
if (!(caps = gst_subparse_format_autodetect (self))) {
return GST_FLOW_UNEXPECTED;
}
}
while ((line = get_next_line (self))) {
subtitle = self->parse_line (&self->state, line);
g_free (line);
if (subtitle) {
GST_DEBUG ("subparse: loop: text %s, %lld+%lld\n",
subtitle, self->state.start_time, self->state.duration);
buf = gst_buffer_new ();
GST_BUFFER_DATA (buf) = (guint8 *) subtitle;
GST_BUFFER_SIZE (buf) = strlen (subtitle);
GST_BUFFER_TIMESTAMP (buf) = self->state.start_time;
GST_BUFFER_DURATION (buf) = self->state.duration;
GST_DEBUG ("sending text buffer %s at %lld", subtitle,
self->state.start_time);
if (G_UNLIKELY (caps)) {
/* set caps on the first buffer */
gst_buffer_set_caps (buf, caps);
gst_caps_unref (caps);
caps = NULL;
}
gst_pad_push (self->srcpad, buf);
}
}
return GST_FLOW_OK;
}
#if 0
static void
gst_subparse_loop (GstPad * sinkpad)
{
GstFlowReturn ret = GST_FLOW_OK;
GstSubparse *self;
GstBuffer *buf;
GST_DEBUG ("gst_subparse_loop");
self = GST_SUBPARSE (GST_OBJECT_PARENT (sinkpad));
GST_STREAM_LOCK (sinkpad);
ret = gst_pad_pull_range (sinkpad, self->next_offset, 1024, &buf);
if (ret == GST_FLOW_OK)
ret = handle_buffer (self, buf);
if (ret != GST_FLOW_OK)
gst_task_pause (GST_RPAD_TASK (sinkpad));
GST_STREAM_UNLOCK (sinkpad);
}
#endif
static GstFlowReturn
gst_subparse_chain (GstPad * sinkpad, GstBuffer * buf)
{
GstFlowReturn ret;
GstSubparse *self;
GST_DEBUG ("gst_subparse_chain");
self = GST_SUBPARSE (GST_OBJECT_PARENT (sinkpad));
GST_STREAM_LOCK (sinkpad);
ret = handle_buffer (self, buf);
GST_STREAM_UNLOCK (sinkpad);
return ret;
}
static GstElementStateReturn
gst_subparse_change_state (GstElement * element)
{
GstSubparse *self = GST_SUBPARSE (element);
switch (GST_STATE_TRANSITION (element)) {
case GST_STATE_PAUSED_TO_READY:
parser_state_dispose (&self->state);
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
break;
case GST_STATE_READY_TO_PAUSED:
/* format detection will init the parser state */
self->offset = self->next_offset = 0;
self->parser_type = GST_SUB_PARSE_FORMAT_UNKNOWN;
break;
default:
break;
}
return parent_class->change_state (element);
}
#if 0
/* typefinding stuff */
static GstTypeDefinition subparse_definition = {
"subparse/x-text",
"text/plain",
".sub",
gst_subparse_type_find,
};
static GstCaps *
gst_subparse_type_find (GstBuffer * buf, gpointer private)
{
GstSubParseFormat format;
format = gst_subparse_data_format_autodetect (buf);
switch (format) {
case GST_SUB_PARSE_FORMAT_MDVDSUB:
GST_DEBUG (GST_CAT_PLUGIN_INFO, "MicroDVD format detected");
return gst_caps_new ("subparse_type_find", "text/plain", NULL);
case GST_SUB_PARSE_FORMAT_SUBRIP:
GST_DEBUG (GST_CAT_PLUGIN_INFO, "SubRip format detected");
return gst_caps_new ("subparse_type_find", "text/plain", NULL);
case GST_SUB_PARSE_FORMAT_UNKNOWN:
GST_DEBUG (GST_CAT_PLUGIN_INFO, "no subtitle format detected");
break;
}
/* don't know which this is */
return NULL;
}
#endif
static gboolean
plugin_init (GstPlugin * plugin)
{
GST_DEBUG_CATEGORY_INIT (subparse_debug, "subparse", 0, ".sub parser");
return gst_element_register (plugin, "subparse",
GST_RANK_PRIMARY, GST_TYPE_SUBPARSE);
}
GST_PLUGIN_DEFINE (GST_VERSION_MAJOR,
GST_VERSION_MINOR,
"subparse",
"Subtitle (.sub) file parsing",
plugin_init, VERSION, "LGPL", GST_PACKAGE, GST_ORIGIN)