diff --git a/gst/multifile/Makefile.am b/gst/multifile/Makefile.am
index e7dea03901..519bd1bfca 100644
--- a/gst/multifile/Makefile.am
+++ b/gst/multifile/Makefile.am
@@ -5,13 +5,14 @@ libgstmultifile_la_SOURCES = \
gstmultifilesink.c \
gstmultifilesrc.c \
gstmultifile.c \
- gstsplitfilesrc.c
+ gstsplitfilesrc.c \
+ patternspec.c
libgstmultifile_la_CFLAGS = $(GST_BASE_CFLAGS) $(GST_CFLAGS) $(GIO_CFLAGS)
libgstmultifile_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) $(GIO_LIBS)
libgstmultifile_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS)
libgstmultifile_la_LIBTOOLFLAGS = --tag=disable-static
-noinst_HEADERS = gstmultifilesrc.h gstmultifilesink.h gstsplitfilesrc.h
+noinst_HEADERS = gstmultifilesrc.h gstmultifilesink.h gstsplitfilesrc.h patternspec.h
Android.mk: Makefile.am $(BUILT_SOURCES)
diff --git a/gst/multifile/gstsplitfilesrc.c b/gst/multifile/gstsplitfilesrc.c
index 3fcdf07629..31259976ea 100644
--- a/gst/multifile/gstsplitfilesrc.c
+++ b/gst/multifile/gstsplitfilesrc.c
@@ -25,17 +25,14 @@
* had to be split into multiple parts due to filesystem file size limitations,
* for example.
*
- * The files to select are chosen via the location property, which takes a
- * regular expression (note: shell-style wildcards will not work). If the
- * location is an absolute path or contains directory components, only the
- * base file name part will be considered a regular expression. The results
- * will be sorted. The location may include directory components, but the
- * regular expression to select the files can only be in the filename part.
+ * The files to select are chosen via the location property, which supports
+ * (and expects) shell-style wildcards (but only for the filename, not for
+ * directories). The results will be sorted.
*
*
* Example launch line
* |[
- * gst-launch splitfilesrc location="/path/to/part-.*.mpg" ! decodebin ! ... \
+ * gst-launch splitfilesrc location="/path/to/part-*.mpg" ! decodebin ! ... \
* ]| Plays the different parts as if they were one single MPEG file.
*
*
@@ -51,9 +48,16 @@
#endif
#include "gstsplitfilesrc.h"
+#include "patternspec.h"
#include
+#ifdef G_OS_WIN32
+#define DEFAULT_PATTERN_MATCH_MODE MATCH_MODE_UTF8
+#else
+#define DEFAULT_PATTERN_MATCH_MODE MATCH_MODE_AUTO
+#endif
+
enum
{
PROP_LOCATION = 1
@@ -105,6 +109,12 @@ gst_split_file_src_base_init (gpointer g_class)
"Tim-Philipp Müller ");
}
+#ifdef G_OS_WIN32
+#define WIN32_BLURB " Location string must be in UTF-8 encoding (on Windows)."
+#else
+#define WIN32_BLURB /* nothing */
+#endif
+
static void
gst_split_file_src_class_init (GstSplitFileSrcClass * klass)
{
@@ -115,16 +125,12 @@ gst_split_file_src_class_init (GstSplitFileSrcClass * klass)
gobject_class->get_property = gst_split_file_src_get_property;
gobject_class->finalize = gst_split_file_src_finalize;
- /* We're using a regular expression here instead of wildcards, because
- * GPatternSpec can only handle UTF-8 and filenames on unix tend to be
- * just bytes and are often ISO-8859-X, and we don't feel like
- * re-inventing GPatternSpec */
g_object_class_install_property (gobject_class, PROP_LOCATION,
g_param_spec_string ("location", "File Location",
- "Regular expression to create file names of the input files. If "
+ "Wildcard pattern to match file names of the input files. If "
"the location is an absolute path or contains directory components, "
- "only the base file name part will be considered a regular "
- "expression. The results will be sorted.",
+ "only the base file name part will be considered for pattern "
+ "matching. The results will be sorted." WIN32_BLURB,
DEFAULT_LOCATION, G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
gstbasesrc_class->start = GST_DEBUG_FUNCPTR (gst_split_file_src_start);
@@ -203,6 +209,12 @@ gst_split_file_src_set_property (GObject * object, guint prop_id,
GST_OBJECT_LOCK (src);
g_free (src->location);
src->location = g_value_dup_string (value);
+#ifdef G_OS_WIN32
+ if (!g_utf8_validate (src->location, -1, NULL)) {
+ g_warning ("splitfilesrc 'location' property must be in UTF-8 "
+ "encoding on Windows");
+ }
+#endif
GST_OBJECT_UNLOCK (src);
break;
default:
@@ -239,10 +251,9 @@ static gchar **
gst_split_file_src_find_files (GstSplitFileSrc * src, const gchar * dirname,
const gchar * basename, GError ** err)
{
+ PatternSpec *pspec;
GPtrArray *files;
- GRegex *regex;
const gchar *name;
- gchar *regex_string;
GDir *dir;
if (dirname == NULL || basename == NULL)
@@ -255,25 +266,20 @@ gst_split_file_src_find_files (GstSplitFileSrc * src, const gchar * dirname,
if (dir == NULL)
return NULL;
- /* we want the filename to be the whole filename, not just some match
- * in the middle of the filename */
- if (g_str_has_suffix (basename, "$"))
- regex_string = g_strdup (basename);
- else
- regex_string = g_strconcat (basename, "$", NULL);
+ if (DEFAULT_PATTERN_MATCH_MODE == MATCH_MODE_UTF8 &&
+ !g_utf8_validate (basename, -1, NULL)) {
+ goto not_utf8;
+ }
- regex = g_regex_new (regex_string, G_REGEX_RAW, (GRegexMatchFlags) 0, err);
- g_free (regex_string);
-
- if (regex == NULL)
- goto regex_fail;
+ /* mode will be AUTO on linux/unix and UTF8 on win32 */
+ pspec = pattern_spec_new (basename, DEFAULT_PATTERN_MATCH_MODE);
files = g_ptr_array_new ();
while ((name = g_dir_read_name (dir))) {
GST_TRACE_OBJECT (src, "check: %s", name);
- if (g_regex_match (regex, name, (GRegexMatchFlags) 0, NULL)) {
- GST_LOG_OBJECT (src, "match: %s", name);
+ if (pattern_match_string (pspec, name)) {
+ GST_DEBUG_OBJECT (src, "match: %s", name);
g_ptr_array_add (files, g_build_filename (dirname, name, NULL));
}
}
@@ -284,7 +290,7 @@ gst_split_file_src_find_files (GstSplitFileSrc * src, const gchar * dirname,
g_ptr_array_sort (files, (GCompareFunc) gst_split_file_src_array_sortfunc);
g_ptr_array_add (files, NULL);
- g_regex_unref (regex);
+ pattern_spec_free (pspec);
g_dir_close (dir);
return (gchar **) g_ptr_array_free (files, FALSE);
@@ -296,21 +302,21 @@ invalid_location:
"No filename specified.");
return NULL;
}
-regex_fail:
+not_utf8:
{
- GST_WARNING_OBJECT (src, "g_regex_new() failed: %s", (*err)->message);
g_dir_close (dir);
+ g_set_error_literal (err, G_FILE_ERROR, G_FILE_ERROR_INVAL,
+ "Filename pattern must be UTF-8 on Windows.");
return NULL;
}
no_matches:
{
- g_regex_unref (regex);
+ pattern_spec_free (pspec);
g_dir_close (dir);
g_set_error_literal (err, G_FILE_ERROR, G_FILE_ERROR_NOENT,
"Found no files matching the pattern.");
return NULL;
}
-
}
static gboolean
diff --git a/gst/multifile/patternspec.c b/gst/multifile/patternspec.c
new file mode 100644
index 0000000000..59de8d1ffb
--- /dev/null
+++ b/gst/multifile/patternspec.c
@@ -0,0 +1,334 @@
+/* GPattern copy that supports raw (non-utf8) matching
+ * based on: GLIB - Library of useful routines for C programming
+ * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "patternspec.h"
+#include
+
+typedef enum
+{
+ MATCH_ALL, /* "*A?A*" */
+ MATCH_ALL_TAIL, /* "*A?AA" */
+ MATCH_HEAD, /* "AAAA*" */
+ MATCH_TAIL, /* "*AAAA" */
+ MATCH_EXACT, /* "AAAAA" */
+ MATCH_LAST
+} MatchType;
+
+struct _PatternSpec
+{
+ MatchMode match_mode;
+ MatchType match_type;
+ guint pattern_length;
+ guint min_length;
+ guint max_length;
+ gchar *pattern;
+};
+
+static inline gchar *
+raw_strreverse (const gchar * str, gssize size)
+{
+ g_assert (size > 0);
+ return g_strreverse (g_strndup (str, size));
+}
+
+static inline gboolean
+pattern_ph_match (const gchar * match_pattern, MatchMode match_mode,
+ const gchar * match_string, gboolean * wildcard_reached_p)
+{
+ register const gchar *pattern, *string;
+ register gchar ch;
+
+ pattern = match_pattern;
+ string = match_string;
+
+ ch = *pattern;
+ pattern++;
+ while (ch) {
+ switch (ch) {
+ case '?':
+ if (!*string)
+ return FALSE;
+ if (match_mode == MATCH_MODE_UTF8)
+ string = g_utf8_next_char (string);
+ else
+ ++string;
+ break;
+
+ case '*':
+ *wildcard_reached_p = TRUE;
+ do {
+ ch = *pattern;
+ pattern++;
+ if (ch == '?') {
+ if (!*string)
+ return FALSE;
+ if (match_mode == MATCH_MODE_UTF8)
+ string = g_utf8_next_char (string);
+ else
+ ++string;
+ }
+ }
+ while (ch == '*' || ch == '?');
+ if (!ch)
+ return TRUE;
+ do {
+ gboolean next_wildcard_reached = FALSE;
+ while (ch != *string) {
+ if (!*string)
+ return FALSE;
+ if (match_mode == MATCH_MODE_UTF8)
+ string = g_utf8_next_char (string);
+ else
+ ++string;
+ }
+ string++;
+ if (pattern_ph_match (pattern, match_mode, string,
+ &next_wildcard_reached))
+ return TRUE;
+ if (next_wildcard_reached)
+ /* the forthcoming pattern substring up to the next wildcard has
+ * been matched, but a mismatch occoured for the rest of the
+ * pattern, following the next wildcard.
+ * there's no need to advance the current match position any
+ * further if the rest pattern will not match.
+ */
+ return FALSE;
+ }
+ while (*string);
+ break;
+
+ default:
+ if (ch == *string)
+ string++;
+ else
+ return FALSE;
+ break;
+ }
+
+ ch = *pattern;
+ pattern++;
+ }
+
+ return *string == 0;
+}
+
+static gboolean
+pattern_match (PatternSpec * pspec, guint string_length,
+ const gchar * string, const gchar * string_reversed)
+{
+ MatchMode match_mode;
+
+ g_assert (pspec != NULL);
+ g_assert (string != NULL);
+
+ if (string_length < pspec->min_length || string_length > pspec->max_length)
+ return FALSE;
+
+ match_mode = pspec->match_mode;
+ if (match_mode == MATCH_MODE_AUTO) {
+ if (!g_utf8_validate (string, string_length, NULL))
+ match_mode = MATCH_MODE_RAW;
+ else
+ match_mode = MATCH_MODE_UTF8;
+ }
+
+ switch (pspec->match_type) {
+ gboolean dummy;
+ case MATCH_ALL:
+ return pattern_ph_match (pspec->pattern, match_mode, string, &dummy);
+ case MATCH_ALL_TAIL:
+ if (string_reversed)
+ return pattern_ph_match (pspec->pattern, match_mode, string_reversed,
+ &dummy);
+ else {
+ gboolean result;
+ gchar *tmp;
+ if (match_mode == MATCH_MODE_UTF8) {
+ tmp = g_utf8_strreverse (string, string_length);
+ } else {
+ tmp = raw_strreverse (string, string_length);
+ }
+ result = pattern_ph_match (pspec->pattern, match_mode, tmp, &dummy);
+ g_free (tmp);
+ return result;
+ }
+ case MATCH_HEAD:
+ if (pspec->pattern_length == string_length)
+ return memcmp (pspec->pattern, string, string_length) == 0;
+ else if (pspec->pattern_length)
+ return memcmp (pspec->pattern, string, pspec->pattern_length) == 0;
+ else
+ return TRUE;
+ case MATCH_TAIL:
+ if (pspec->pattern_length)
+ /* compare incl. NUL terminator */
+ return memcmp (pspec->pattern,
+ string + (string_length - pspec->pattern_length),
+ pspec->pattern_length + 1) == 0;
+ else
+ return TRUE;
+ case MATCH_EXACT:
+ if (pspec->pattern_length != string_length)
+ return FALSE;
+ else
+ return memcmp (pspec->pattern, string, string_length) == 0;
+ default:
+ g_return_val_if_fail (pspec->match_type < MATCH_LAST, FALSE);
+ return FALSE;
+ }
+}
+
+PatternSpec *
+pattern_spec_new (const gchar * pattern, MatchMode match_mode)
+{
+ PatternSpec *pspec;
+ gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
+ gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
+ gboolean follows_wildcard = FALSE;
+ guint pending_jokers = 0;
+ const gchar *s;
+ gchar *d;
+ guint i;
+
+ g_assert (pattern != NULL);
+ g_assert (match_mode != MATCH_MODE_UTF8
+ || g_utf8_validate (pattern, -1, NULL));
+
+ /* canonicalize pattern and collect necessary stats */
+ pspec = g_new (PatternSpec, 1);
+ pspec->match_mode = match_mode;
+ pspec->pattern_length = strlen (pattern);
+ pspec->min_length = 0;
+ pspec->max_length = 0;
+ pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
+
+ if (pspec->match_mode == MATCH_MODE_AUTO) {
+ if (!g_utf8_validate (pattern, -1, NULL))
+ pspec->match_mode = MATCH_MODE_RAW;
+ }
+
+ d = pspec->pattern;
+ for (i = 0, s = pattern; *s != 0; s++) {
+ switch (*s) {
+ case '*':
+ if (follows_wildcard) { /* compress multiple wildcards */
+ pspec->pattern_length--;
+ continue;
+ }
+ follows_wildcard = TRUE;
+ if (hw_pos < 0)
+ hw_pos = i;
+ tw_pos = i;
+ break;
+ case '?':
+ pending_jokers++;
+ pspec->min_length++;
+ if (pspec->match_mode == MATCH_MODE_RAW) {
+ pspec->max_length += 1;
+ } else {
+ pspec->max_length += 4; /* maximum UTF-8 character length */
+ }
+ continue;
+ default:
+ for (; pending_jokers; pending_jokers--, i++) {
+ *d++ = '?';
+ if (hj_pos < 0)
+ hj_pos = i;
+ tj_pos = i;
+ }
+ follows_wildcard = FALSE;
+ pspec->min_length++;
+ pspec->max_length++;
+ break;
+ }
+ *d++ = *s;
+ i++;
+ }
+ for (; pending_jokers; pending_jokers--) {
+ *d++ = '?';
+ if (hj_pos < 0)
+ hj_pos = i;
+ tj_pos = i;
+ }
+ *d++ = 0;
+ seen_joker = hj_pos >= 0;
+ seen_wildcard = hw_pos >= 0;
+ more_wildcards = seen_wildcard && hw_pos != tw_pos;
+ if (seen_wildcard)
+ pspec->max_length = G_MAXUINT;
+
+ /* special case sole head/tail wildcard or exact matches */
+ if (!seen_joker && !more_wildcards) {
+ if (pspec->pattern[0] == '*') {
+ pspec->match_type = MATCH_TAIL;
+ memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
+ pspec->pattern[pspec->pattern_length] = 0;
+ return pspec;
+ }
+ if (pspec->pattern_length > 0 &&
+ pspec->pattern[pspec->pattern_length - 1] == '*') {
+ pspec->match_type = MATCH_HEAD;
+ pspec->pattern[--pspec->pattern_length] = 0;
+ return pspec;
+ }
+ if (!seen_wildcard) {
+ pspec->match_type = MATCH_EXACT;
+ return pspec;
+ }
+ }
+
+ /* now just need to distinguish between head or tail match start */
+ tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */
+ tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */
+ if (seen_wildcard)
+ pspec->match_type = tw_pos > hw_pos ? MATCH_ALL_TAIL : MATCH_ALL;
+ else /* seen_joker */
+ pspec->match_type = tj_pos > hj_pos ? MATCH_ALL_TAIL : MATCH_ALL;
+ if (pspec->match_type == MATCH_ALL_TAIL) {
+ gchar *tmp = pspec->pattern;
+
+ if (pspec->match_mode == MATCH_MODE_RAW) {
+ pspec->pattern = raw_strreverse (pspec->pattern, pspec->pattern_length);
+ } else {
+ pspec->pattern =
+ g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
+ }
+ g_free (tmp);
+ }
+ return pspec;
+}
+
+void
+pattern_spec_free (PatternSpec * pspec)
+{
+ g_assert (pspec != NULL);
+
+ g_free (pspec->pattern);
+ g_free (pspec);
+}
+
+gboolean
+pattern_match_string (PatternSpec * pspec, const gchar * string)
+{
+ return pattern_match (pspec, strlen (string), string, NULL);
+}
diff --git a/gst/multifile/patternspec.h b/gst/multifile/patternspec.h
new file mode 100644
index 0000000000..c3e9436384
--- /dev/null
+++ b/gst/multifile/patternspec.h
@@ -0,0 +1,47 @@
+/* GPattern copy that supports raw (non-utf8) matching
+ * based on: GLIB - Library of useful routines for C programming
+ * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __PATTERN_SPEC_H__
+#define __PATTERN_SPEC_H__
+
+#include
+
+G_BEGIN_DECLS
+
+typedef enum
+{
+ MATCH_MODE_AUTO = 0,
+ MATCH_MODE_UTF8,
+ MATCH_MODE_RAW
+} MatchMode;
+
+typedef struct _PatternSpec PatternSpec;
+
+PatternSpec * pattern_spec_new (const gchar * pattern,
+ MatchMode match_mode);
+
+void pattern_spec_free (PatternSpec * pspec);
+
+gboolean pattern_match_string (PatternSpec * pspec,
+ const gchar * string);
+
+G_END_DECLS
+
+#endif /* __PATTERN_SPEC_H__ */