ttmlparse: Convert tabs to spaces in input

The TTML spec has an issue in which tab (U+0009) characters that are
first in a sequence of whitespace characters are not suppressed at the
start and end of line areas. This issue was reported in [1] and the
editor of the TTML specs confirmed that this was not the intention
behind the spec.

The editor has created an issue to fix this in both the TTML1 and TTML2
specs [2], giving a proposal of what the spec should say. This patch
updates ttmlparse to implement the intended behaviour as proposed, in
which tabs in the input are converted to spaces before processing.

[1] https://github.com/w3c/imsc/issues/224
[2] https://github.com/w3c/ttml1/issues/235

https://bugzilla.gnome.org/show_bug.cgi?id=781539
This commit is contained in:
Chris Bass 2017-04-20 14:43:45 +01:00 committed by Sebastian Dröge
parent 6eb36406f0
commit 9f5e41218f

View file

@ -44,6 +44,12 @@
#define MAX_FONT_FAMILY_NAME_LENGTH 128 #define MAX_FONT_FAMILY_NAME_LENGTH 128
#define NSECONDS_IN_DAY 24 * 3600 * GST_SECOND #define NSECONDS_IN_DAY 24 * 3600 * GST_SECOND
#define TTML_CHAR_NULL 0x00
#define TTML_CHAR_SPACE 0x20
#define TTML_CHAR_TAB 0x09
#define TTML_CHAR_LF 0x0A
#define TTML_CHAR_CR 0x0D
GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug); GST_DEBUG_CATEGORY_EXTERN (ttmlparse_debug);
#define GST_CAT_DEFAULT ttmlparse_debug #define GST_CAT_DEFAULT ttmlparse_debug
@ -1247,12 +1253,14 @@ ttml_handle_element_whitespace (GNode * node, gpointer data)
gunichar u = g_utf8_get_char (c); gunichar u = g_utf8_get_char (c);
gint nbytes = g_unichar_to_utf8 (u, buf); gint nbytes = g_unichar_to_utf8 (u, buf);
if (nbytes == 1 && buf[0] == 0xA) { /* Repace each newline or tab with a space. */
if (nbytes == 1 && (buf[0] == TTML_CHAR_LF || buf[0] == TTML_CHAR_TAB)) {
*c = ' '; *c = ' ';
buf[0] = 0x20; buf[0] = TTML_CHAR_SPACE;
} }
if (nbytes == 1 && (buf[0] == 0x20 || buf[0] == 0x9 || buf[0] == 0xD)) { /* Collapse runs of whitespace. */
if (nbytes == 1 && (buf[0] == TTML_CHAR_SPACE || buf[0] == TTML_CHAR_CR)) {
++space_count; ++space_count;
} else { } else {
if (space_count > 1) { if (space_count > 1) {
@ -1261,7 +1269,7 @@ ttml_handle_element_whitespace (GNode * node, gpointer data)
c = new_head; c = new_head;
} }
space_count = 0; space_count = 0;
if (nbytes == 1 && buf[0] == 0x0) /* Reached end of string. */ if (nbytes == 1 && buf[0] == TTML_CHAR_NULL)
break; break;
} }
} }