mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-04-26 06:46:12 +00:00
subparse: recognise more subrip timestamp variants
Be even less restrictive in what we accept for .srt timestamps when typefinding and parsing subrip subtitles and add a unit test for the 'new' format. Fixes #585197.
This commit is contained in:
parent
e01fab3ace
commit
40bea96ff6
2 changed files with 81 additions and 15 deletions
|
@ -730,11 +730,59 @@ subrip_fix_up_markup (gchar ** p_txt)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static gboolean
|
||||||
|
parse_subrip_time (const gchar * ts_string, GstClockTime * t)
|
||||||
|
{
|
||||||
|
gchar s[128] = { '\0', };
|
||||||
|
gchar *end, *p;
|
||||||
|
guint hour, min, sec, msec, len;
|
||||||
|
|
||||||
|
while (*ts_string == ' ')
|
||||||
|
++ts_string;
|
||||||
|
|
||||||
|
g_strlcpy (s, ts_string, sizeof (s));
|
||||||
|
if ((end = strstr (s, "-->")))
|
||||||
|
*end = '\0';
|
||||||
|
g_strchomp (s);
|
||||||
|
|
||||||
|
/* ms may be in these formats:
|
||||||
|
* hh:mm:ss,500 = 500ms
|
||||||
|
* hh:mm:ss, 5 = 5ms
|
||||||
|
* hh:mm:ss, 5 = 50ms
|
||||||
|
* hh:mm:ss, 50 = 50ms
|
||||||
|
* hh:mm:ss,5 = 500ms
|
||||||
|
* and sscanf() doesn't differentiate between ' 5' and '5' so munge
|
||||||
|
* the white spaces within the timestamp to '0' (I'm sure there's a
|
||||||
|
* way to make sscanf() do this for us, but how?)
|
||||||
|
*/
|
||||||
|
g_strdelimit (s, " ", '0');
|
||||||
|
|
||||||
|
/* make sure we have exactly three digits after he comma */
|
||||||
|
p = strchr (s, ',');
|
||||||
|
g_assert (p != NULL);
|
||||||
|
++p;
|
||||||
|
len = strlen (p);
|
||||||
|
if (len > 3) {
|
||||||
|
p[3] = '\0';
|
||||||
|
} else
|
||||||
|
while (len < 3) {
|
||||||
|
g_strlcat (&p[len], "0", 2);
|
||||||
|
++len;
|
||||||
|
}
|
||||||
|
|
||||||
|
GST_LOG ("parsing timestamp '%s'", s);
|
||||||
|
if (sscanf (s, "%u:%u:%u,%u", &hour, &min, &sec, &msec) != 4) {
|
||||||
|
GST_WARNING ("failed to parse subrip timestamp string '%s'", s);
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
*t = ((hour * 3600) + (min * 60) + sec) * GST_SECOND + msec * GST_MSECOND;
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
static gchar *
|
static gchar *
|
||||||
parse_subrip (ParserState * state, const gchar * line)
|
parse_subrip (ParserState * state, const gchar * line)
|
||||||
{
|
{
|
||||||
guint h1, m1, s1, ms1;
|
|
||||||
guint h2, m2, s2, ms2;
|
|
||||||
int subnum;
|
int subnum;
|
||||||
gchar *ret;
|
gchar *ret;
|
||||||
|
|
||||||
|
@ -745,21 +793,24 @@ parse_subrip (ParserState * state, const gchar * line)
|
||||||
state->state = 1;
|
state->state = 1;
|
||||||
return NULL;
|
return NULL;
|
||||||
case 1:
|
case 1:
|
||||||
|
{
|
||||||
|
GstClockTime ts_start, ts_end;
|
||||||
|
gchar *end_time;
|
||||||
|
|
||||||
/* looking for start_time --> end_time */
|
/* looking for start_time --> end_time */
|
||||||
if (sscanf (line, "%u:%u:%u,%u --> %u:%u:%u,%u",
|
if ((end_time = strstr (line, " --> ")) &&
|
||||||
&h1, &m1, &s1, &ms1, &h2, &m2, &s2, &ms2) == 8) {
|
parse_subrip_time (line, &ts_start) &&
|
||||||
|
parse_subrip_time (end_time + strlen (" --> "), &ts_end) &&
|
||||||
|
state->start_time <= ts_end) {
|
||||||
state->state = 2;
|
state->state = 2;
|
||||||
state->start_time =
|
state->start_time = ts_start;
|
||||||
(((guint64) h1) * 3600 + m1 * 60 + s1) * GST_SECOND +
|
state->duration = ts_end - ts_start;
|
||||||
ms1 * GST_MSECOND;
|
|
||||||
state->duration =
|
|
||||||
(((guint64) h2) * 3600 + m2 * 60 + s2) * GST_SECOND +
|
|
||||||
ms2 * GST_MSECOND - state->start_time;
|
|
||||||
} else {
|
} else {
|
||||||
GST_DEBUG ("error parsing subrip time line");
|
GST_DEBUG ("error parsing subrip time line '%s'", line);
|
||||||
state->state = 0;
|
state->state = 0;
|
||||||
}
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
|
}
|
||||||
case 2:
|
case 2:
|
||||||
{
|
{
|
||||||
/* No need to parse that text if it's out of segment */
|
/* No need to parse that text if it's out of segment */
|
||||||
|
@ -993,9 +1044,9 @@ gst_sub_parse_data_format_autodetect_regex_once (GstSubParseRegex regtype)
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case GST_SUB_PARSE_REGEX_SUBRIP:
|
case GST_SUB_PARSE_REGEX_SUBRIP:
|
||||||
result = (gpointer) g_regex_new ("^([ 0-9]){0,3}[0-9](\x0d)?\x0a"
|
result = (gpointer) g_regex_new ("^([ 0-9]){0,3}[0-9]\\s*(\x0d)?\x0a"
|
||||||
"[ 0-9][0-9]:[ 0-9][0-9]:[ 0-9][0-9],[ 0-9]{2}[0-9]"
|
"[ 0-9][0-9]:[ 0-9][0-9]:[ 0-9][0-9],[ 0-9]{0,2}[0-9]"
|
||||||
" --> ([ 0-9])?[0-9]:[ 0-9][0-9]:[ 0-9][0-9],[ 0-9]{2}[0-9]",
|
" +--> +([ 0-9])?[0-9]:[ 0-9][0-9]:[ 0-9][0-9],[ 0-9]{0,2}[0-9]",
|
||||||
0, 0, &gerr);
|
0, 0, &gerr);
|
||||||
if (result == NULL) {
|
if (result == NULL) {
|
||||||
g_warning ("Compilation of subrip regex failed: %s", gerr->message);
|
g_warning ("Compilation of subrip regex failed: %s", gerr->message);
|
||||||
|
@ -1083,7 +1134,7 @@ gst_sub_parse_format_autodetect (GstSubParse * self)
|
||||||
gchar *data;
|
gchar *data;
|
||||||
GstSubParseFormat format;
|
GstSubParseFormat format;
|
||||||
|
|
||||||
if (strlen (self->textbuf->str) < 35) {
|
if (strlen (self->textbuf->str) < 30) {
|
||||||
GST_DEBUG ("File too small to be a subtitles file");
|
GST_DEBUG ("File too small to be a subtitles file");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
|
@ -139,6 +139,18 @@ static SubParseInputChunk srt_input2[] = {
|
||||||
0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
|
0, 3 * GST_SECOND + 50 * GST_MSECOND, "Just testing."}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/* starts with chunk number 0 and has less than three digits after the comma
|
||||||
|
* and a few extra spaces before the arrow or at the end of the line */
|
||||||
|
static SubParseInputChunk srt_input3[] = {
|
||||||
|
{
|
||||||
|
"0\n00:00:01,0 --> 00:00:02,0\nOne\n\n",
|
||||||
|
1000 * GST_MSECOND, 2000 * GST_MSECOND, "One"}, {
|
||||||
|
"1\n00:00:02,5 --> 00:00:03, 5 \nTwo\n\n",
|
||||||
|
2500 * GST_MSECOND, 3005 * GST_MSECOND, "Two"}, {
|
||||||
|
"2\n00:00:03, 9 --> 00:00:04,0 \nThree\n\n",
|
||||||
|
3090 * GST_MSECOND, 4000 * GST_MSECOND, "Three"}
|
||||||
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
setup_subparse (void)
|
setup_subparse (void)
|
||||||
{
|
{
|
||||||
|
@ -247,6 +259,9 @@ GST_START_TEST (test_srt)
|
||||||
|
|
||||||
/* try with UTF-8 BOM at the start */
|
/* try with UTF-8 BOM at the start */
|
||||||
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
|
test_srt_do_test (srt_input1, 0, G_N_ELEMENTS (srt_input2));
|
||||||
|
|
||||||
|
/* try with fewer than three post-comma digits, and some extra spaces */
|
||||||
|
test_srt_do_test (srt_input3, 0, G_N_ELEMENTS (srt_input3));
|
||||||
}
|
}
|
||||||
|
|
||||||
GST_END_TEST;
|
GST_END_TEST;
|
||||||
|
|
Loading…
Reference in a new issue