diff --git a/ChangeLog b/ChangeLog index 21ee2c2251..43e3e53ad6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,20 @@ +2006-10-23 Tim-Philipp Müller + + * gst/subparse/Makefile.am: + * gst/subparse/gstsubparse.c: + (gst_sub_parse_data_format_autodetect), + (gst_sub_parse_format_autodetect), (handle_buffer), + (gst_sub_parse_chain), (gst_subparse_type_find), (plugin_init): + * gst/subparse/gstsubparse.h: + * gst/subparse/tmplayerparse.c: (tmplayer_parse_line), + (parse_tmplayer): + * gst/subparse/tmplayerparse.h: + Add support for TMPlayer-type subtitles (#362845). + + * tests/check/elements/subparse.c: (test_tmplayer_do_test), + (GST_START_TEST), (subparse_suite): + Add some basic unit tests for the above. + 2006-10-23 Tim-Philipp Müller * tests/check/elements/audiorate.c: (test_injector_base_init), diff --git a/gst/subparse/Makefile.am b/gst/subparse/Makefile.am index 6726ae0b0e..41b17d5c05 100644 --- a/gst/subparse/Makefile.am +++ b/gst/subparse/Makefile.am @@ -6,7 +6,9 @@ libgstsubparse_la_SOURCES = \ gstsubparse.c \ gstsubparse.h \ samiparse.c \ - samiparse.h + samiparse.h \ + tmplayerparse.c \ + tmplayerparse.h libgstsubparse_la_CFLAGS = $(GST_CFLAGS) libgstsubparse_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) @@ -15,4 +17,5 @@ libgstsubparse_la_LIBADD = $(GST_LIBS) noinst_HEADERS = \ gstssaparse.h \ gstsubparse.h \ - samiparse.h + samiparse.h \ + tmplayerparse.h diff --git a/gst/subparse/gstsubparse.c b/gst/subparse/gstsubparse.c index b8942a34c4..b6446f01a2 100644 --- a/gst/subparse/gstsubparse.c +++ b/gst/subparse/gstsubparse.c @@ -31,9 +31,9 @@ #include "gstsubparse.h" #include "gstssaparse.h" #include "samiparse.h" +#include "tmplayerparse.h" -GST_DEBUG_CATEGORY_STATIC (sub_parse_debug); -#define GST_CAT_DEFAULT sub_parse_debug +GST_DEBUG_CATEGORY (sub_parse_debug); #define DEFAULT_ENCODING NULL @@ -62,7 +62,8 @@ GST_ELEMENT_DETAILS ("Subtitle parser", static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", GST_PAD_SINK, GST_PAD_ALWAYS, - GST_STATIC_CAPS ("application/x-subtitle; application/x-subtitle-sami") + GST_STATIC_CAPS ("application/x-subtitle; application/x-subtitle-sami; " + "application/x-subtitle-tmplayer") ); #else static GstStaticPadTemplate sink_templ = GST_STATIC_PAD_TEMPLATE ("sink", @@ -754,6 +755,7 @@ gst_sub_parse_data_format_autodetect (gchar * match_str) static gboolean need_init_regexps = TRUE; static regex_t mdvd_rx; static regex_t subrip_rx; + guint n1, n2, n3; /* initialize the regexps used the first time around */ if (need_init_regexps) { @@ -789,6 +791,15 @@ gst_sub_parse_data_format_autodetect (gchar * match_str) GST_LOG ("SAMI (time based) format detected"); return GST_SUB_PARSE_FORMAT_SAMI; } + /* we're boldly assuming the first subtitle appears within the first hour */ + if (sscanf (match_str, "0:%02u:%02u:", &n1, &n2) == 2 || + sscanf (match_str, "0:%02u:%02u=", &n1, &n2) == 2 || + sscanf (match_str, "00:%02u:%02u:", &n1, &n2) == 2 || + sscanf (match_str, "00:%02u:%02u=", &n1, &n2) == 2 || + sscanf (match_str, "00:%02u:%02u,%u=", &n1, &n2, &n3) == 3) { + GST_LOG ("TMPlayer (time based) format detected"); + return GST_SUB_PARSE_FORMAT_TMPLAYER; + } GST_DEBUG ("no subtitle format detected"); return GST_SUB_PARSE_FORMAT_UNKNOWN; @@ -826,6 +837,9 @@ gst_sub_parse_format_autodetect (GstSubParse * self) self->parse_line = parse_sami; sami_context_init (&self->state); return gst_caps_new_simple ("text/x-pango-markup", NULL); + case GST_SUB_PARSE_FORMAT_TMPLAYER: + self->parse_line = parse_tmplayer; + return gst_caps_new_simple ("text/plain", NULL); case GST_SUB_PARSE_FORMAT_UNKNOWN: default: GST_DEBUG ("no subtitle format detected"); @@ -878,7 +892,7 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) /* Set segment on our parser state machine */ self->state.segment = self->segment; /* Now parse the line, out of segment lines will just return NULL */ - GST_DEBUG ("Parsing line '%s'", line); + GST_LOG_OBJECT (self, "Parsing line '%s'", line); subtitle = self->parse_line (&self->state, line); g_free (line); @@ -900,7 +914,7 @@ handle_buffer (GstSubParse * self, GstBuffer * buf) gst_segment_set_last_stop (self->segment, GST_FORMAT_TIME, self->state.start_time); - GST_DEBUG ("Sending text '%s', %" GST_TIME_FORMAT " + %" + GST_DEBUG_OBJECT (self, "Sending text '%s', %" GST_TIME_FORMAT " + %" GST_TIME_FORMAT, subtitle, GST_TIME_ARGS (self->state.start_time), GST_TIME_ARGS (self->state.duration)); @@ -924,8 +938,7 @@ gst_sub_parse_chain (GstPad * sinkpad, GstBuffer * buf) GstFlowReturn ret; GstSubParse *self; - GST_DEBUG ("gst_sub_parse_chain"); - self = GST_SUBPARSE (gst_pad_get_parent (sinkpad)); + self = GST_SUBPARSE (GST_PAD_PARENT (sinkpad)); /* Push newsegment if needed */ if (self->need_segment) { @@ -938,8 +951,6 @@ gst_sub_parse_chain (GstPad * sinkpad, GstBuffer * buf) ret = handle_buffer (self, buf); - gst_object_unref (self); - return ret; } @@ -1052,11 +1063,16 @@ gst_sub_parse_change_state (GstElement * element, GstStateChange transition) * Typefind support. */ +/* FIXME 0.11: these caps are ugly, use app/x-subtitle + type field or so; + * also, give different subtitle formats really different types */ +static GstStaticCaps tmp_caps = +GST_STATIC_CAPS ("application/x-subtitle-tmplayer"); static GstStaticCaps smi_caps = GST_STATIC_CAPS ("application/x-subtitle-sami"); static GstStaticCaps sub_caps = GST_STATIC_CAPS ("application/x-subtitle"); #define SUB_CAPS (gst_static_caps_get (&sub_caps)) #define SAMI_CAPS (gst_static_caps_get (&smi_caps)) +#define TMP_CAPS (gst_static_caps_get (&tmp_caps)) static void gst_subparse_type_find (GstTypeFind * tf, gpointer private) @@ -1091,6 +1107,10 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) GST_DEBUG ("SAMI (time-based) format detected"); caps = SAMI_CAPS; break; + case GST_SUB_PARSE_FORMAT_TMPLAYER: + GST_DEBUG ("TMPlayer (time based) format detected"); + caps = TMP_CAPS; + break; default: case GST_SUB_PARSE_FORMAT_UNKNOWN: GST_DEBUG ("no subtitle format detected"); @@ -1104,7 +1124,9 @@ gst_subparse_type_find (GstTypeFind * tf, gpointer private) static gboolean plugin_init (GstPlugin * plugin) { - static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", "smi", NULL }; + static gchar *sub_exts[] = { "srt", "sub", "mpsub", "mdvd", "smi", "txt", + NULL + }; GST_DEBUG_CATEGORY_INIT (sub_parse_debug, "subparse", 0, ".sub parser"); diff --git a/gst/subparse/gstsubparse.h b/gst/subparse/gstsubparse.h index 30e09786b9..8999d42001 100644 --- a/gst/subparse/gstsubparse.h +++ b/gst/subparse/gstsubparse.h @@ -23,6 +23,9 @@ #include +GST_DEBUG_CATEGORY_EXTERN (sub_parse_debug); +#define GST_CAT_DEFAULT sub_parse_debug + G_BEGIN_DECLS #define GST_TYPE_SUBPARSE \ @@ -46,7 +49,8 @@ typedef enum GST_SUB_PARSE_FORMAT_MDVDSUB = 1, GST_SUB_PARSE_FORMAT_SUBRIP = 2, GST_SUB_PARSE_FORMAT_MPSUB = 3, - GST_SUB_PARSE_FORMAT_SAMI = 4 + GST_SUB_PARSE_FORMAT_SAMI = 4, + GST_SUB_PARSE_FORMAT_TMPLAYER = 5 } GstSubParseFormat; typedef struct { diff --git a/gst/subparse/tmplayerparse.c b/gst/subparse/tmplayerparse.c new file mode 100644 index 0000000000..fb0460219a --- /dev/null +++ b/gst/subparse/tmplayerparse.c @@ -0,0 +1,118 @@ +/* GStreamer tmplayer format subtitle parser + * Copyright (C) 2006 Tim-Philipp Müller + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#include "tmplayerparse.h" + +#include + +/* From http://forum.doom9.org/archive/index.php/t-81059.html: + * + * TMPlayer format, which comes in five varieties: + * + * time-base 00:00:00: + * 00:00:50:This is the Earth at a time|when the dinosaurs roamed... + * 00:00:53: + * 00:00:54:a lush and fertile planet. + * 00:00:56: + * + * time-base 0:00:00: + * 0:00:50:This is the Earth at a time|when the dinosaurs roamed... + * 0:00:53: + * 0:00:54:a lush and fertile planet. + * 0:00:56: + * + * time-base 00:00:00= + * 00:00:50=This is the Earth at a time|when the dinosaurs roamed... + * 00:00:53= + * 00:00:54=a lush and fertile planet. + * 00:00:56= + * + * time-base 0:00:00= + * 0:00:50=This is the Earth at a time|when the dinosaurs roamed... + * 0:00:53= + * 0:00:54=a lush and fertile planet. + * 0:00:56= + * + * and multiline time-base 00:00:00,1= + * 00:00:50,1=This is the Earth at a time + * 00:00:50,2=when the dinosaurs roamed... + * 00:00:53,1= + * 00:00:54,1=a lush and fertile planet. + * 00:00:56,1= + */ + +static gchar * +tmplayer_parse_line (ParserState * state, const gchar * line, guint line_num) +{ + GstClockTime ts = GST_CLOCK_TIME_NONE; + const gchar *text_start = NULL; + gboolean multiline = FALSE; + gchar *ret = NULL; + gchar divc = '\0'; + guint h, m, s, l = 1; + + if (sscanf (line, "%u:%02u:%02u,%u%c", &h, &m, &s, &l, &divc) == 5 && + (divc == '=')) { + GST_LOG ("multiline format %u %u %u %u", h, m, s, l); + ts = GST_SECOND * ((((h * 60) + m) * 60) + s); + text_start = strchr (line, '='); + multiline = TRUE; + } else if (sscanf (line, "%u:%02u:%02u%c", &h, &m, &s, &divc) == 4 && + (divc == '=' || divc == ':')) { + GST_LOG ("single line format %u %u %u %u %c", h, m, s, l, divc); + ts = GST_SECOND * ((((h * 60) + m) * 60) + s); + text_start = strchr (line + 6, divc); + } else { + GST_WARNING ("failed to parse line: '%s'", line); + return NULL; + } + + if (text_start == NULL || text_start[1] == '\0' || + (l == 1 && state->buf->len > 0)) { + if (GST_CLOCK_TIME_IS_VALID (state->start_time) && + state->start_time < ts && line_num > 0) { + ret = g_strndup (state->buf->str, state->buf->len); + g_strdelimit (ret, "|", '\n'); + g_string_truncate (state->buf, 0); + state->duration = ts - state->start_time; + } else if (line_num > 0) { + GST_WARNING ("end of subtitle unit but no valid start time?!"); + } + } else { + if (l > 1) + g_string_append_c (state->buf, '\n'); + g_string_append (state->buf, text_start + 1); + state->start_time = ts; + } + + return ret; +} + +gchar * +parse_tmplayer (ParserState * state, const gchar * line) +{ + gchar *ret; + + /* GST_LOG ("Parsing: %s", line); */ + + ret = tmplayer_parse_line (state, line, state->state); + ++state->state; + + return ret; +} diff --git a/gst/subparse/tmplayerparse.h b/gst/subparse/tmplayerparse.h new file mode 100644 index 0000000000..a0001afcab --- /dev/null +++ b/gst/subparse/tmplayerparse.h @@ -0,0 +1,32 @@ +/* GStreamer tmplayer format subtitle parser + * Copyright (C) 2006 Tim-Philipp Müller + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +#ifndef _TMPLAYER_PARSE_H_ +#define _TMPLAYER_PARSE_H_ + +#include "gstsubparse.h" + +G_BEGIN_DECLS + +gchar * parse_tmplayer (ParserState * state, const gchar * line); + +G_END_DECLS + +#endif /* _TMPLAYER_PARSE_H_ */ + diff --git a/tests/check/elements/subparse.c b/tests/check/elements/subparse.c index 80d32dface..a81428d150 100644 --- a/tests/check/elements/subparse.c +++ b/tests/check/elements/subparse.c @@ -202,6 +202,199 @@ GST_START_TEST (test_srt) GST_END_TEST; +static void +test_tmplayer_do_test (SubParseInputChunk * input, guint num) +{ + guint n; + + setup_subparse (); + + for (n = 0; n < num; ++n) { + GstBuffer *buf; + + buf = buffer_from_static_string (input[n].in); + fail_unless_equals_int (gst_pad_push (mysrcpad, buf), GST_FLOW_OK); + } + + gst_pad_push_event (mysrcpad, gst_event_new_eos ()); + + fail_unless_equals_int (g_list_length (buffers), num); + + for (n = 0; n < num; ++n) { + const GstStructure *buffer_caps_struct; + GstBuffer *buf; + gchar *out; + guint out_size; + + buf = g_list_nth_data (buffers, n); + fail_unless (buf != NULL); + fail_unless (GST_BUFFER_TIMESTAMP_IS_VALID (buf), NULL); + fail_unless (GST_BUFFER_DURATION_IS_VALID (buf), NULL); + fail_unless_equals_uint64 (GST_BUFFER_TIMESTAMP (buf), input[n].from_ts); + fail_unless_equals_uint64 (GST_BUFFER_DURATION (buf), + input[n].to_ts - input[n].from_ts); + out = (gchar *) GST_BUFFER_DATA (buf); + out_size = GST_BUFFER_SIZE (buf); + /* shouldn't have trailing newline characters */ + fail_if (out_size > 0 && out[out_size - 1] == '\n'); + /* shouldn't include NUL-terminator in data size */ + fail_if (out_size > 0 && out[out_size - 1] == '\0'); + /* but should still have a NUL-terminator behind the declared data */ + fail_unless_equals_int (out[out_size], '\0'); + /* make sure out string matches expected string */ + fail_unless_equals_string (out, input[n].out); + /* check caps */ + fail_unless (GST_BUFFER_CAPS (buf) != NULL); + buffer_caps_struct = gst_caps_get_structure (GST_BUFFER_CAPS (buf), 0); + fail_unless_equals_string (gst_structure_get_name (buffer_caps_struct), + "text/plain"); + } + + teardown_subparse (); +} + +GST_START_TEST (test_tmplayer_multiline) +{ + static SubParseInputChunk tmplayer_multiline_input[] = { + { + "00:00:10,1=This is the Earth at a time\n" + "00:00:10,2=when the dinosaurs roamed...\n" "00:00:13,1=\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "00:00:14,1=a lush and fertile planet.\n" "00:00:16,1=\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_multiline_input, + G_N_ELEMENTS (tmplayer_multiline_input)); +} + +GST_END_TEST; + +GST_START_TEST (test_tmplayer_multiline_with_bogus_lines) +{ + static SubParseInputChunk tmplayer_multiline_b_input[] = { + { + "00:00:10,1=This is the Earth at a time\n" + "Yooboo wabahablablahuguug bogus line hello test 1-2-3-4\n" + "00:00:10,2=when the dinosaurs roamed...\n" "00:00:13,1=\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "00:00:14,1=a lush and fertile planet.\n" "00:00:16,1=\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_multiline_b_input, + G_N_ELEMENTS (tmplayer_multiline_b_input)); +} + +GST_END_TEST; + +GST_START_TEST (test_tmplayer_style1) +{ + static SubParseInputChunk tmplayer_style1_input[] = { + { + "00:00:10:This is the Earth at a time|when the dinosaurs roamed...\n" + "00:00:13:\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "00:00:14:a lush and fertile planet.\n" "00:00:16:\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_style1_input, + G_N_ELEMENTS (tmplayer_style1_input)); +} + +GST_END_TEST; + +GST_START_TEST (test_tmplayer_style2) +{ + static SubParseInputChunk tmplayer_style2_input[] = { + { + "00:00:10=This is the Earth at a time|when the dinosaurs roamed...\n" + "00:00:13=\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "00:00:14=a lush and fertile planet.\n" "00:00:16=\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_style2_input, + G_N_ELEMENTS (tmplayer_style2_input)); +} + +GST_END_TEST; + +GST_START_TEST (test_tmplayer_style3) +{ + static SubParseInputChunk tmplayer_style3_input[] = { + { + "0:00:10:This is the Earth at a time|when the dinosaurs roamed...\n" + "0:00:13:\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "0:00:14:a lush and fertile planet.\n" "0:00:16:\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_style3_input, + G_N_ELEMENTS (tmplayer_style3_input)); +} + +GST_END_TEST; + +GST_START_TEST (test_tmplayer_style4) +{ + static SubParseInputChunk tmplayer_style4_input[] = { + { + "0:00:10=This is the Earth at a time|when the dinosaurs roamed...\n" + "0:00:13=\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "0:00:14=a lush and fertile planet.\n" "0:00:16=\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_style4_input, + G_N_ELEMENTS (tmplayer_style4_input)); +} + +GST_END_TEST; + +GST_START_TEST (test_tmplayer_style4_with_bogus_lines) +{ + static SubParseInputChunk tmplayer_style4b_input[] = { + { + "0:00:10=This is the Earth at a time|when the dinosaurs roamed...\n" + "# This is a bogus line with a comment and should just be skipped\n" + "0:00:13=\n", + 10 * GST_SECOND, 13 * GST_SECOND, + "This is the Earth at a time\nwhen the dinosaurs roamed..."}, { + "0:00:14=a lush and fertile planet.\n" + " \n" + "0:00:16=\n", + 14 * GST_SECOND, 16 * GST_SECOND, + "a lush and fertile planet."} + }; + + test_tmplayer_do_test (tmplayer_style4b_input, + G_N_ELEMENTS (tmplayer_style4b_input)); +} + +GST_END_TEST; + +/* TODO: + * - add/modify tests so that lines aren't dogfed to the parsers in complete + * lines or sets of complete lines, but rather in random chunks + */ + static Suite * subparse_suite (void) { @@ -211,7 +404,13 @@ subparse_suite (void) suite_add_tcase (s, tc_chain); tcase_add_test (tc_chain, test_srt); - + tcase_add_test (tc_chain, test_tmplayer_multiline); + tcase_add_test (tc_chain, test_tmplayer_multiline_with_bogus_lines); + tcase_add_test (tc_chain, test_tmplayer_style1); + tcase_add_test (tc_chain, test_tmplayer_style2); + tcase_add_test (tc_chain, test_tmplayer_style3); + tcase_add_test (tc_chain, test_tmplayer_style4); + tcase_add_test (tc_chain, test_tmplayer_style4_with_bogus_lines); return s; }