mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-26 00:58:12 +00:00
gst/avi/gstavisubtitle.c: Detect other UTF byte order markers and convert to UTF-8 as appropriate.
Original commit message from CVS: * gst/avi/gstavisubtitle.c: (IS_BOM_UTF8), (IS_BOM_UTF16_BE), (IS_BOM_UTF16_LE), (IS_BOM_UTF32_BE), (IS_BOM_UTF32_LE), (gst_avi_subtitle_extract_file), (gst_avi_subtitle_parse_gab2_chunk): Detect other UTF byte order markers and convert to UTF-8 as appropriate.
This commit is contained in:
parent
87aed1a256
commit
49e1ff8931
2 changed files with 80 additions and 15 deletions
|
@ -1,3 +1,11 @@
|
||||||
|
2007-12-18 Tim-Philipp Müller <tim at centricular dot net>
|
||||||
|
|
||||||
|
* gst/avi/gstavisubtitle.c: (IS_BOM_UTF8), (IS_BOM_UTF16_BE),
|
||||||
|
(IS_BOM_UTF16_LE), (IS_BOM_UTF32_BE), (IS_BOM_UTF32_LE),
|
||||||
|
(gst_avi_subtitle_extract_file), (gst_avi_subtitle_parse_gab2_chunk):
|
||||||
|
Detect other UTF byte order markers and convert to UTF-8 as
|
||||||
|
appropriate.
|
||||||
|
|
||||||
2007-12-18 Tim-Philipp Müller <tim at centricular dot net>
|
2007-12-18 Tim-Philipp Müller <tim at centricular dot net>
|
||||||
|
|
||||||
* gst/avi/gstavisubtitle.c: (src_template),
|
* gst/avi/gstavisubtitle.c: (src_template),
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
* Boston, MA 02111-1307, USA.
|
* Boston, MA 02111-1307, USA.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* FIXME: BOM detection and format conversion; validate UTF-8; handle seeks */
|
/* FIXME: handle seeks, documentation */
|
||||||
|
|
||||||
/* example of a subtitle chunk in an avi file
|
/* example of a subtitle chunk in an avi file
|
||||||
* 00000000: 47 41 42 32 00 02 00 10 00 00 00 45 00 6e 00 67 GAB2.......E.n.g
|
* 00000000: 47 41 42 32 00 02 00 10 00 00 00 45 00 6e 00 67 GAB2.......E.n.g
|
||||||
|
@ -64,24 +64,72 @@ static GstStateChangeReturn gst_avi_subtitle_change_state (GstElement * element,
|
||||||
GST_BOILERPLATE (GstAviSubtitle, gst_avi_subtitle, GstElement,
|
GST_BOILERPLATE (GstAviSubtitle, gst_avi_subtitle, GstElement,
|
||||||
GST_TYPE_ELEMENT);
|
GST_TYPE_ELEMENT);
|
||||||
|
|
||||||
|
#define IS_BOM_UTF8(data) ((GST_READ_UINT32_BE(data) >> 8) == 0xEFBBBF)
|
||||||
|
#define IS_BOM_UTF16_BE(data) (GST_READ_UINT16_BE(data) == 0xFEFF)
|
||||||
|
#define IS_BOM_UTF16_LE(data) (GST_READ_UINT16_LE(data) == 0xFEFF)
|
||||||
|
#define IS_BOM_UTF32_BE(data) (GST_READ_UINT32_BE(data) == 0xFEFF)
|
||||||
|
#define IS_BOM_UTF32_LE(data) (GST_READ_UINT32_LE(data) == 0xFEFF)
|
||||||
|
|
||||||
static GstBuffer *
|
static GstBuffer *
|
||||||
gst_avi_subtitle_extract_utf8_file (GstBuffer * buffer, guint offset, guint len)
|
gst_avi_subtitle_extract_file (GstAviSubtitle * sub, GstBuffer * buffer,
|
||||||
|
guint offset, guint len)
|
||||||
{
|
{
|
||||||
guint8 *file = GST_BUFFER_DATA (buffer) + offset;
|
const gchar *input_enc = NULL;
|
||||||
|
GstBuffer *ret;
|
||||||
|
gchar *data;
|
||||||
|
|
||||||
if (file[0] == 0xEF && file[1] == 0xBB && file[2] == 0xBF) {
|
data = (gchar *) GST_BUFFER_DATA (buffer) + offset;
|
||||||
/* UTF-8 */
|
|
||||||
return gst_buffer_create_sub (buffer, offset + 3, len - 3);
|
if (len >= (3 + 1) && IS_BOM_UTF8 (data) &&
|
||||||
|
g_utf8_validate (data + 3, len - 3, NULL)) {
|
||||||
|
ret = gst_buffer_create_sub (buffer, offset + 3, len - 3);
|
||||||
|
} else if (len >= 2 && IS_BOM_UTF16_BE (data)) {
|
||||||
|
input_enc = "UTF-16BE";
|
||||||
|
data += 2;
|
||||||
|
len -= 2;
|
||||||
|
} else if (len >= 2 && IS_BOM_UTF16_LE (data)) {
|
||||||
|
input_enc = "UTF-16LE";
|
||||||
|
data += 2;
|
||||||
|
len -= 2;
|
||||||
|
} else if (len >= 4 && IS_BOM_UTF32_BE (data)) {
|
||||||
|
input_enc = "UTF-32BE";
|
||||||
|
data += 4;
|
||||||
|
len -= 4;
|
||||||
|
} else if (len >= 4 && IS_BOM_UTF32_LE (data)) {
|
||||||
|
input_enc = "UTF-32LE";
|
||||||
|
data += 4;
|
||||||
|
len -= 4;
|
||||||
|
} else if (g_utf8_validate (data, len, NULL)) {
|
||||||
|
/* not specified, check if it's UTF-8 */
|
||||||
|
ret = gst_buffer_create_sub (buffer, offset, len);
|
||||||
|
} else {
|
||||||
|
/* we could fall back to gst_tag_freeform_to_utf8() here */
|
||||||
|
GST_WARNING_OBJECT (sub, "unspecified encoding, and not UTF-8");
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
/* TODO Check for:
|
|
||||||
* 00 00 FE FF UTF-32, big-endian
|
|
||||||
* FF FE 00 00 UTF-32, little-endian
|
|
||||||
* FE FF UTF-16, big-endian
|
|
||||||
* FF FE UTF-16, little-endian
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* No BOM detected assuming UTF-8 */
|
if (input_enc) {
|
||||||
return gst_buffer_create_sub (buffer, offset, len);
|
GError *err = NULL;
|
||||||
|
gchar *utf8;
|
||||||
|
|
||||||
|
GST_DEBUG_OBJECT (sub, "converting subtitles from %s to UTF-8", input_enc);
|
||||||
|
utf8 = g_convert (data, len, "UTF-8", input_enc, NULL, NULL, &err);
|
||||||
|
|
||||||
|
if (err != NULL) {
|
||||||
|
GST_WARNING_OBJECT (sub, "conversion to UTF-8 failed : %s", err->message);
|
||||||
|
g_error_free (err);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = gst_buffer_new ();
|
||||||
|
GST_BUFFER_DATA (ret) = (guint8 *) utf8;
|
||||||
|
GST_BUFFER_MALLOCDATA (ret) = (guint8 *) utf8;
|
||||||
|
GST_BUFFER_SIZE (ret) = strlen (utf8);
|
||||||
|
GST_BUFFER_OFFSET (ret) = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
GST_BUFFER_CAPS (ret) = gst_caps_new_simple ("application/x-subtitle", NULL);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static GstFlowReturn
|
static GstFlowReturn
|
||||||
|
@ -129,7 +177,10 @@ gst_avi_subtitle_parse_gab2_chunk (GstAviSubtitle * sub, GstBuffer * buf)
|
||||||
* assume all the remaining data in the chunk is subtitle data, there may
|
* assume all the remaining data in the chunk is subtitle data, there may
|
||||||
* be padding at the end for some reason, so only parse file_length bytes */
|
* be padding at the end for some reason, so only parse file_length bytes */
|
||||||
sub->subfile =
|
sub->subfile =
|
||||||
gst_avi_subtitle_extract_utf8_file (buf, 17 + name_length, file_length);
|
gst_avi_subtitle_extract_file (sub, buf, 17 + name_length, file_length);
|
||||||
|
|
||||||
|
if (sub->subfile == NULL)
|
||||||
|
goto extract_failed;
|
||||||
|
|
||||||
return GST_FLOW_OK;
|
return GST_FLOW_OK;
|
||||||
|
|
||||||
|
@ -159,6 +210,12 @@ wrong_total_length:
|
||||||
17 + name_length + file_length, size));
|
17 + name_length + file_length, size));
|
||||||
return GST_FLOW_ERROR;
|
return GST_FLOW_ERROR;
|
||||||
}
|
}
|
||||||
|
extract_failed:
|
||||||
|
{
|
||||||
|
GST_ELEMENT_ERROR (sub, STREAM, DECODE, (NULL),
|
||||||
|
("could not extract subtitles"));
|
||||||
|
return GST_FLOW_ERROR;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static GstFlowReturn
|
static GstFlowReturn
|
||||||
|
|
Loading…
Reference in a new issue