mirror of
https://gitlab.freedesktop.org/gstreamer/gstreamer.git
synced 2025-01-27 09:38:17 +00:00
gst/matroska/: Try to fix up broken matroska files containing subtitle streams with non-UTF8 character encodings (cou...
Original commit message from CVS: * gst/matroska/matroska-demux.c: (gst_matroska_demux_check_subtitle_buffer), (gst_matroska_demux_parse_blockgroup_or_simpleblock), (gst_matroska_demux_subtitle_caps): * gst/matroska/matroska-ids.c: (gst_matroska_track_init_subtitle_context): * gst/matroska/matroska-ids.h: Try to fix up broken matroska files containing subtitle streams with non-UTF8 character encodings (courtesy of mkvmerge) using either the encoding specified in the GST_SUBTITLE_ENCODING environment variable or the current locale's character set if it is non-UTF8. Fixes #337076.
This commit is contained in:
parent
a6af52cc25
commit
45c10ca9de
4 changed files with 100 additions and 2 deletions
16
ChangeLog
16
ChangeLog
|
@ -1,3 +1,19 @@
|
|||
2006-06-22 Tim-Philipp Müller <tim at centricular dot net>
|
||||
|
||||
* gst/matroska/matroska-demux.c:
|
||||
(gst_matroska_demux_check_subtitle_buffer),
|
||||
(gst_matroska_demux_parse_blockgroup_or_simpleblock),
|
||||
(gst_matroska_demux_subtitle_caps):
|
||||
* gst/matroska/matroska-ids.c:
|
||||
(gst_matroska_track_init_subtitle_context):
|
||||
* gst/matroska/matroska-ids.h:
|
||||
Try to fix up broken matroska files containing subtitle
|
||||
streams with non-UTF8 character encodings (courtesy of
|
||||
mkvmerge) using either the encoding specified in the
|
||||
GST_SUBTITLE_ENCODING environment variable or the
|
||||
current locale's character set if it is non-UTF8.
|
||||
Fixes #337076.
|
||||
|
||||
2006-06-22 Tim-Philipp Müller <tim at centricular dot net>
|
||||
|
||||
* gst/id3demux/id3v2frames.c: (parse_picture_frame):
|
||||
|
|
|
@ -36,7 +36,7 @@
|
|||
#include "matroska-demux.h"
|
||||
#include "matroska-ids.h"
|
||||
|
||||
GST_DEBUG_CATEGORY (matroskademux_debug);
|
||||
GST_DEBUG_CATEGORY_STATIC (matroskademux_debug);
|
||||
#define GST_CAT_DEFAULT matroskademux_debug
|
||||
|
||||
enum
|
||||
|
@ -2135,6 +2135,75 @@ gst_matroska_demux_add_wvpk_header (GstMatroskaTrackContext * stream,
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
static GstBuffer *
|
||||
gst_matroska_demux_check_subtitle_buffer (GstMatroskaDemux * demux,
|
||||
GstMatroskaTrackContext * stream, GstBuffer * buf)
|
||||
{
|
||||
GstMatroskaTrackSubtitleContext *sub_stream;
|
||||
const gchar *encoding, *data;
|
||||
GError *err = NULL;
|
||||
GstBuffer *newbuf;
|
||||
gchar *utf8;
|
||||
guint size;
|
||||
|
||||
sub_stream = (GstMatroskaTrackSubtitleContext *) stream;
|
||||
|
||||
if (!sub_stream->check_utf8)
|
||||
return buf;
|
||||
|
||||
data = (const gchar *) GST_BUFFER_DATA (buf);
|
||||
size = GST_BUFFER_SIZE (buf);
|
||||
|
||||
if (!sub_stream->invalid_utf8) {
|
||||
if (g_utf8_validate (data, size, NULL)) {
|
||||
return buf;
|
||||
}
|
||||
GST_WARNING_OBJECT (demux, "subtitle stream %d is not valid UTF-8, this "
|
||||
"is broken according to the matroska specification", stream->num);
|
||||
sub_stream->invalid_utf8 = TRUE;
|
||||
}
|
||||
|
||||
/* file with broken non-UTF8 subtitle, do the best we can do to fix it */
|
||||
encoding = g_getenv ("GST_SUBTITLE_ENCODING");
|
||||
if (encoding == NULL || *encoding == '\0') {
|
||||
/* if local encoding is UTF-8 and no encoding specified
|
||||
* via the environment variable, assume ISO-8859-15 */
|
||||
if (g_get_charset (&encoding)) {
|
||||
encoding = "ISO-8859-15";
|
||||
}
|
||||
}
|
||||
|
||||
utf8 = g_convert_with_fallback (data, size, "UTF-8", encoding, "*",
|
||||
NULL, NULL, &err);
|
||||
|
||||
if (err) {
|
||||
GST_LOG_OBJECT (demux, "could not convert string from '%s' to UTF-8: %s",
|
||||
encoding, err->message);
|
||||
g_error_free (err);
|
||||
g_free (utf8);
|
||||
|
||||
/* invalid input encoding, fall back to ISO-8859-15 (always succeeds) */
|
||||
encoding = "ISO-8859-15";
|
||||
utf8 = g_convert_with_fallback (data, size, "UTF-8", encoding, "*",
|
||||
NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
GST_LOG_OBJECT (demux, "converted subtitle text from %s to UTF-8 %s",
|
||||
encoding, (err) ? "(using ISO-8859-15 as fallback)" : "");
|
||||
|
||||
if (utf8 == NULL)
|
||||
utf8 = g_strdup ("invalid subtitle");
|
||||
|
||||
newbuf = gst_buffer_new ();
|
||||
GST_BUFFER_MALLOCDATA (newbuf) = (guint8 *) utf8;
|
||||
GST_BUFFER_DATA (newbuf) = (guint8 *) utf8;
|
||||
GST_BUFFER_SIZE (newbuf) = strlen (utf8);
|
||||
gst_buffer_stamp (newbuf, buf);
|
||||
|
||||
gst_buffer_unref (buf);
|
||||
return newbuf;
|
||||
}
|
||||
|
||||
static gboolean
|
||||
gst_matroska_demux_parse_blockgroup_or_simpleblock (GstMatroskaDemux * demux,
|
||||
guint64 cluster_time, gboolean is_simpleblock)
|
||||
|
@ -2415,6 +2484,12 @@ gst_matroska_demux_parse_blockgroup_or_simpleblock (GstMatroskaDemux * demux,
|
|||
GST_TIME_ARGS (GST_BUFFER_DURATION (sub)));
|
||||
|
||||
gst_buffer_set_caps (sub, GST_PAD_CAPS (stream->pad));
|
||||
|
||||
/* Fix up broken files with subtitles that are not UTF8 */
|
||||
if (stream->type == GST_MATROSKA_TRACK_TYPE_SUBTITLE) {
|
||||
sub = gst_matroska_demux_check_subtitle_buffer (demux, stream, sub);
|
||||
}
|
||||
|
||||
ret = gst_pad_push (stream->pad, sub);
|
||||
if (ret != GST_FLOW_OK && ret != GST_FLOW_NOT_LINKED)
|
||||
got_error = TRUE;
|
||||
|
@ -3448,15 +3523,20 @@ gst_matroska_demux_subtitle_caps (GstMatroskaTrackSubtitleContext *
|
|||
|
||||
if (!strcmp (codec_id, GST_MATROSKA_CODEC_ID_SUBTITLE_UTF8)) {
|
||||
caps = gst_caps_new_simple ("text/plain", NULL);
|
||||
subtitlecontext->check_utf8 = TRUE;
|
||||
} else if (!strcmp (codec_id, GST_MATROSKA_CODEC_ID_SUBTITLE_SSA)) {
|
||||
caps = gst_caps_new_simple ("application/x-ssa", NULL);
|
||||
subtitlecontext->check_utf8 = TRUE;
|
||||
} else if (!strcmp (codec_id, GST_MATROSKA_CODEC_ID_SUBTITLE_ASS)) {
|
||||
caps = gst_caps_new_simple ("application/x-ass", NULL);
|
||||
subtitlecontext->check_utf8 = TRUE;
|
||||
} else if (!strcmp (codec_id, GST_MATROSKA_CODEC_ID_SUBTITLE_USF)) {
|
||||
caps = gst_caps_new_simple ("application/x-usf", NULL);
|
||||
subtitlecontext->check_utf8 = TRUE;
|
||||
} else {
|
||||
GST_DEBUG ("Unknown subtitle stream: codec_id='%s'", codec_id);
|
||||
caps = gst_caps_new_simple ("application/x-subtitle-unknown", NULL);
|
||||
subtitlecontext->check_utf8 = FALSE;
|
||||
}
|
||||
|
||||
if (data != NULL && size > 0) {
|
||||
|
|
|
@ -105,6 +105,7 @@ gst_matroska_track_init_subtitle_context (GstMatroskaTrackContext ** p_context)
|
|||
*p_context = (GstMatroskaTrackContext *) subtitle_context;
|
||||
|
||||
(*p_context)->type = GST_MATROSKA_TRACK_TYPE_SUBTITLE;
|
||||
subtitle_context->invalid_utf8 = FALSE;
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
|
|
|
@ -284,7 +284,8 @@ typedef struct _GstMatroskaTrackComplexContext {
|
|||
typedef struct _GstMatroskaTrackSubtitleContext {
|
||||
GstMatroskaTrackContext parent;
|
||||
|
||||
/* or here... */
|
||||
gboolean check_utf8; /* buffers should be valid UTF-8 */
|
||||
gboolean invalid_utf8; /* work around broken files */
|
||||
} GstMatroskaTrackSubtitleContext;
|
||||
|
||||
typedef struct _GstMatroskaIndex {
|
||||
|
|
Loading…
Reference in a new issue