From 088c7c07a2525fe83f2c7ffd77b79f4e9db19fb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim-Philipp=20M=C3=BCller?= Date: Fri, 11 Dec 2009 23:59:54 +0000 Subject: [PATCH] tag: add some utility functions for language codes and tags Add some utility functions for language tags and ISO-639 codes. These are useful for both GUIs and elements. The iso-codes package is used for language name translations if available. API: gst_tag_get_language_codes() API: gst_tag_get_language_name() API: gst_tag_get_language_code() API: gst_tag_get_language_code_iso_639_1() API: gst_tag_get_language_code_iso_639_2B() API: gst_tag_get_language_code_iso_639_2T() --- .gitignore | 2 + configure.ac | 53 ++ docs/libs/gst-plugins-base-libs-docs.sgml | 1 + docs/libs/gst-plugins-base-libs-sections.txt | 12 + gst-libs/gst/tag/Makefile.am | 16 +- gst-libs/gst/tag/lang-tables.c | 447 +++++++++++++++++ gst-libs/gst/tag/lang.c | 491 +++++++++++++++++++ gst-libs/gst/tag/mklangtables.c | 239 +++++++++ gst-libs/gst/tag/tag.h | 24 + tests/check/libs/tag.c | 53 +- win32/common/libgsttag.def | 5 + 11 files changed, 1341 insertions(+), 2 deletions(-) create mode 100644 gst-libs/gst/tag/lang-tables.c create mode 100644 gst-libs/gst/tag/lang.c create mode 100644 gst-libs/gst/tag/mklangtables.c diff --git a/.gitignore b/.gitignore index 3466709752..e07bc131c4 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,5 @@ Makefile.in Makefile *.gir *.typelib + +gst-libs/gst/tag/mklangtables diff --git a/configure.ac b/configure.ac index bc3e66bf65..e461d8668d 100644 --- a/configure.ac +++ b/configure.ac @@ -348,6 +348,56 @@ if test "x$HAVE_SYS_SOCKET_H" != "xyes"; then AG_GST_DISABLE_PLUGIN(tcp) fi +dnl iso-codes is optional, used by libgsttag +AC_ARG_ENABLE(iso-codes, +AC_HELP_STRING([--enable-iso-codes],[use iso-codes if installed]), +[case "${enableval}" in + yes) enable_iso_codes=yes ;; + no) enable_iso_codes=no ;; + *) AC_MSG_ERROR(bad value ${enableval} for --enable-iso-codes) ;; +esac +], +[enable_iso_codes=yes]) dnl Default value + +AC_MSG_CHECKING([whether to use iso-codes if they are available]) +if test "x$enable_iso_codes" = "xyes"; then + AC_MSG_RESULT([yes]) + have_iso_codes=no + AC_MSG_CHECKING([whether iso-codes are available on this system]) + if $PKG_CONFIG iso-codes; then + AC_MSG_RESULT([yes]) + AC_MSG_CHECKING([whether iso-codes has iso-639 domain]) + if $PKG_CONFIG --variable=domains iso-codes | $GREP -q 639 ; then + AC_MSG_RESULT([yes]) + AC_MSG_CHECKING([for iso-codes prefix]) + ISO_CODES_PREFIX=`$PKG_CONFIG --variable=prefix iso-codes` + if test -d "$ISO_CODES_PREFIX"; then + AC_MSG_RESULT([yes]) + AC_MSG_CHECKING([ISO_CODES_PREFIX]) + AC_MSG_RESULT([$ISO_CODES_PREFIX]) + ISO_639_DOMAIN="iso_639" + AC_MSG_CHECKING([ISO_639_DOMAIN]) + AC_MSG_RESULT([$ISO_639_DOMAIN]) + have_iso_codes=yes + AC_DEFINE([HAVE_ISO_CODES], [1], [make use of iso-codes for ISO-639]) + AC_DEFINE_UNQUOTED([ISO_CODES_PREFIX], ["$ISO_CODES_PREFIX"], [prefix]) + ISO_CODES_VERSION=`$PKG_CONFIG --modversion iso-codes` + AC_DEFINE_UNQUOTED([ISO_CODES_VERSION], ["$ISO_CODES_VERSION"], [ ]) + else + AC_MSG_RESULT([no]) + fi + else + AC_MSG_RESULT([no]) + fi + else + AC_MSG_RESULT([no]) + fi + AM_CONDITIONAL(USE_ISO_CODES, test "x$have_iso_codes" = "xyes") +else + AC_MSG_RESULT([no (disabled via --disable-iso-codes)]) + AM_CONDITIONAL(USE_ISO_CODES, false) +fi + dnl *** sys plug-ins *** echo @@ -847,6 +897,7 @@ sed \ -e 's/.* HAVE_CPU_I386$/#define HAVE_CPU_I386 1/' \ -e 's/.* HAVE_FGETPOS$/#define HAVE_FGETPOS 1/' \ -e 's/.* HAVE_FSETPOS$/#define HAVE_FSETPOS 1/' \ + -e 's/.* HAVE_ISO_CODES$/#undef HAVE_ISO_CODES/' \ -e 's/.* HAVE_LIBXML2$/#define HAVE_LIBXML2 1/' \ -e 's/.* HAVE_PROCESS_H$/#define HAVE_PROCESS_H 1/' \ -e 's/.* HAVE_STDLIB_H$/#define HAVE_STDLIB_H 1/' \ @@ -856,6 +907,8 @@ sed \ -e 's/.* HAVE_WIN32$/#define HAVE_WIN32 1/' \ -e 's/.* HAVE_WINSOCK2_H$/#define HAVE_WINSOCK2_H 1/' \ -e 's/.* HOST_CPU$/#define HOST_CPU "i686"/' \ + -e 's/.* ISO_CODES_PREFIX$/#undef ISO_CODES_PREFIX/' \ + -e 's/.* ISO_CODES_VERSION$/#undef ISO_CODES_VERSION/' \ -e 's/.* LIBDIR$/#ifdef _DEBUG\n# define LIBDIR PREFIX "\\\\debug\\\\lib"\n#else\n# define LIBDIR PREFIX "\\\\lib"\n#endif/' \ -e 's/.* LOCALEDIR$/#define LOCALEDIR PREFIX "\\\\share\\\\locale"/' \ -e "s/.* PACKAGE$/#define PACKAGE \"$PACKAGE\"/" \ diff --git a/docs/libs/gst-plugins-base-libs-docs.sgml b/docs/libs/gst-plugins-base-libs-docs.sgml index 8b3861a9e2..4263497f88 100644 --- a/docs/libs/gst-plugins-base-libs-docs.sgml +++ b/docs/libs/gst-plugins-base-libs-docs.sgml @@ -192,6 +192,7 @@ + diff --git a/docs/libs/gst-plugins-base-libs-sections.txt b/docs/libs/gst-plugins-base-libs-sections.txt index 5684a7ea6c..7574a4c252 100644 --- a/docs/libs/gst-plugins-base-libs-sections.txt +++ b/docs/libs/gst-plugins-base-libs-sections.txt @@ -1564,6 +1564,18 @@ GstTagDemuxClass GstTagDemuxResult +
+gsttaglanguagecodes +gst/tag/tag.h + +gst_tag_get_language_codes +gst_tag_get_language_name +gst_tag_get_language_code +gst_tag_get_language_code_iso_639_1 +gst_tag_get_language_code_iso_639_2B +gst_tag_get_language_code_iso_639_2T +
+ # base utils
diff --git a/gst-libs/gst/tag/Makefile.am b/gst-libs/gst/tag/Makefile.am index b5ee2aae29..3694e9c7b3 100644 --- a/gst-libs/gst/tag/Makefile.am +++ b/gst-libs/gst/tag/Makefile.am @@ -6,7 +6,7 @@ libgsttaginclude_HEADERS = \ lib_LTLIBRARIES = libgsttag-@GST_MAJORMINOR@.la -libgsttag_@GST_MAJORMINOR@_la_SOURCES = gstvorbistag.c gstid3tag.c tags.c gsttagdemux.c +libgsttag_@GST_MAJORMINOR@_la_SOURCES = gstvorbistag.c gstid3tag.c lang.c tags.c gsttagdemux.c libgsttag_@GST_MAJORMINOR@_la_CFLAGS = $(GST_PLUGINS_BASE_CFLAGS) $(GST_BASE_CFLAGS) $(GST_CFLAGS) libgsttag_@GST_MAJORMINOR@_la_LIBADD = $(GST_BASE_LIBS) $(GST_LIBS) libgsttag_@GST_MAJORMINOR@_la_LDFLAGS = $(GST_LIB_LDFLAGS) $(GST_ALL_LDFLAGS) $(GST_LT_LDFLAGS) @@ -49,3 +49,17 @@ typelibs_DATA = $(BUILT_GIRSOURCES:.gir=.typelib) CLEANFILES = $(BUILT_GIRSOURCES) $(typelibs_DATA) endif + +# little program that reads iso_639.xml and outputs tables for us as fallback +# for when iso-codes are not available (and so we don't have to read the xml +# just to map codes) +if USE_ISO_CODES +ISO_CODE_PROGS = mklangtables +mklangtables_SOURCES = mklangtables.c +mklangtables_CFLAGS = $(GST_CFLAGS) +mklangtables_LDADD = $(GST_LIBS) +else +ISO_CODE_PROGS = +endif + +noinst_PROGRAMS = $(ISO_CODE_PROGS) diff --git a/gst-libs/gst/tag/lang-tables.c b/gst-libs/gst/tag/lang-tables.c new file mode 100644 index 0000000000..f6e9127dbe --- /dev/null +++ b/gst-libs/gst/tag/lang-tables.c @@ -0,0 +1,447 @@ +/* generated by mklangtables.c iso-codes 3.12 */ + +#include + +#define ISO_639_FLAG_2T (1 << 0) +#define ISO_639_FLAG_2B (1 << 1) + +/* *INDENT-OFF* */ + +static const struct +{ + const gchar iso_639_1[3]; + const gchar iso_639_2[4]; + guint8 flags; + guint16 name_offset; +} iso_639_codes[] = { + /* Afar */ + { "aa", "aar", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 0 }, + /* Abkhazian */ + { "ab", "abk", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 5 }, + /* Avestan */ + { "ae", "ave", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 15 }, + /* Afrikaans */ + { "af", "afr", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 23 }, + /* Akan */ + { "ak", "aka", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 33 }, + /* Amharic */ + { "am", "amh", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 38 }, + /* Aragonese */ + { "an", "arg", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 46 }, + /* Arabic */ + { "ar", "ara", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 56 }, + /* Assamese */ + { "as", "asm", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 63 }, + /* Avaric */ + { "av", "ava", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 72 }, + /* Aymara */ + { "ay", "aym", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 79 }, + /* Azerbaijani */ + { "az", "aze", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 86 }, + /* Bashkir */ + { "ba", "bak", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 98 }, + /* Belarusian */ + { "be", "bel", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 106 }, + /* Bulgarian */ + { "bg", "bul", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 117 }, + /* Bihari languages */ + { "bh", "bih", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 127 }, + /* Bislama */ + { "bi", "bis", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 144 }, + /* Bambara */ + { "bm", "bam", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 152 }, + /* Bengali */ + { "bn", "ben", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 160 }, + /* Tibetan */ + { "bo", "bod", ISO_639_FLAG_2T, 168 }, + { "bo", "tib", ISO_639_FLAG_2B, 168 }, + /* Breton */ + { "br", "bre", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 176 }, + /* Bosnian */ + { "bs", "bos", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 183 }, + /* Catalan; Valencian */ + { "ca", "cat", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 191 }, + /* Chechen */ + { "ce", "che", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 210 }, + /* Chamorro */ + { "ch", "cha", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 218 }, + /* Corsican */ + { "co", "cos", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 227 }, + /* Cree */ + { "cr", "cre", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 236 }, + /* Czech */ + { "cs", "ces", ISO_639_FLAG_2T, 241 }, + { "cs", "cze", ISO_639_FLAG_2B, 241 }, + /* Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic */ + { "cu", "chu", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 247 }, + /* Chuvash */ + { "cv", "chv", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 328 }, + /* Welsh */ + { "cy", "cym", ISO_639_FLAG_2T, 336 }, + { "cy", "wel", ISO_639_FLAG_2B, 336 }, + /* Danish */ + { "da", "dan", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 342 }, + /* German */ + { "de", "deu", ISO_639_FLAG_2T, 349 }, + { "de", "ger", ISO_639_FLAG_2B, 349 }, + /* Divehi; Dhivehi; Maldivian */ + { "dv", "div", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 356 }, + /* Dzongkha */ + { "dz", "dzo", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 383 }, + /* Ewe */ + { "ee", "ewe", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 392 }, + /* Greek, Modern (1453-) */ + { "el", "ell", ISO_639_FLAG_2T, 396 }, + { "el", "gre", ISO_639_FLAG_2B, 396 }, + /* English */ + { "en", "eng", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 418 }, + /* Esperanto */ + { "eo", "epo", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 426 }, + /* Spanish; Castilian */ + { "es", "spa", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 436 }, + /* Estonian */ + { "et", "est", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 455 }, + /* Basque */ + { "eu", "eus", ISO_639_FLAG_2T, 464 }, + { "eu", "baq", ISO_639_FLAG_2B, 464 }, + /* Persian */ + { "fa", "fas", ISO_639_FLAG_2T, 471 }, + { "fa", "per", ISO_639_FLAG_2B, 471 }, + /* Fulah */ + { "ff", "ful", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 479 }, + /* Finnish */ + { "fi", "fin", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 485 }, + /* Fijian */ + { "fj", "fij", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 493 }, + /* Faroese */ + { "fo", "fao", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 500 }, + /* French */ + { "fr", "fra", ISO_639_FLAG_2T, 508 }, + { "fr", "fre", ISO_639_FLAG_2B, 508 }, + /* Western Frisian */ + { "fy", "fry", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 515 }, + /* Irish */ + { "ga", "gle", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 531 }, + /* Gaelic; Scottish Gaelic */ + { "gd", "gla", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 537 }, + /* Galician */ + { "gl", "glg", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 561 }, + /* Guarani */ + { "gn", "grn", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 570 }, + /* Gujarati */ + { "gu", "guj", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 578 }, + /* Manx */ + { "gv", "glv", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 587 }, + /* Hausa */ + { "ha", "hau", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 592 }, + /* Hebrew */ + { "he", "heb", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 598 }, + /* Hindi */ + { "hi", "hin", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 605 }, + /* Hiri Motu */ + { "ho", "hmo", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 611 }, + /* Croatian */ + { "hr", "hrv", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 621 }, + /* Haitian; Haitian Creole */ + { "ht", "hat", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 630 }, + /* Hungarian */ + { "hu", "hun", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 654 }, + /* Armenian */ + { "hy", "hye", ISO_639_FLAG_2T, 664 }, + { "hy", "arm", ISO_639_FLAG_2B, 664 }, + /* Herero */ + { "hz", "her", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 673 }, + /* Interlingua (International Auxiliary Language Association) */ + { "ia", "ina", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 680 }, + /* Indonesian */ + { "id", "ind", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 739 }, + /* Interlingue; Occidental */ + { "ie", "ile", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 750 }, + /* Igbo */ + { "ig", "ibo", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 774 }, + /* Sichuan Yi; Nuosu */ + { "ii", "iii", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 779 }, + /* Inupiaq */ + { "ik", "ipk", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 797 }, + /* Ido */ + { "io", "ido", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 805 }, + /* Icelandic */ + { "is", "isl", ISO_639_FLAG_2T, 809 }, + { "is", "ice", ISO_639_FLAG_2B, 809 }, + /* Italian */ + { "it", "ita", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 819 }, + /* Inuktitut */ + { "iu", "iku", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 827 }, + /* Japanese */ + { "ja", "jpn", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 837 }, + /* Javanese */ + { "jv", "jav", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 846 }, + /* Georgian */ + { "ka", "kat", ISO_639_FLAG_2T, 855 }, + { "ka", "geo", ISO_639_FLAG_2B, 855 }, + /* Kongo */ + { "kg", "kon", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 864 }, + /* Kikuyu; Gikuyu */ + { "ki", "kik", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 870 }, + /* Kuanyama; Kwanyama */ + { "kj", "kua", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 885 }, + /* Kazakh */ + { "kk", "kaz", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 904 }, + /* Kalaallisut; Greenlandic */ + { "kl", "kal", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 911 }, + /* Central Khmer */ + { "km", "khm", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 936 }, + /* Kannada */ + { "kn", "kan", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 950 }, + /* Korean */ + { "ko", "kor", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 958 }, + /* Kanuri */ + { "kr", "kau", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 965 }, + /* Kashmiri */ + { "ks", "kas", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 972 }, + /* Kurdish */ + { "ku", "kur", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 981 }, + /* Komi */ + { "kv", "kom", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 989 }, + /* Cornish */ + { "kw", "cor", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 994 }, + /* Kirghiz; Kyrgyz */ + { "ky", "kir", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1002 }, + /* Latin */ + { "la", "lat", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1018 }, + /* Luxembourgish; Letzeburgesch */ + { "lb", "ltz", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1024 }, + /* Ganda */ + { "lg", "lug", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1053 }, + /* Limburgan; Limburger; Limburgish */ + { "li", "lim", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1059 }, + /* Lingala */ + { "ln", "lin", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1092 }, + /* Lao */ + { "lo", "lao", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1100 }, + /* Lithuanian */ + { "lt", "lit", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1104 }, + /* Luba-Katanga */ + { "lu", "lub", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1115 }, + /* Latvian */ + { "lv", "lav", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1128 }, + /* Malagasy */ + { "mg", "mlg", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1136 }, + /* Marshallese */ + { "mh", "mah", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1145 }, + /* Maori */ + { "mi", "mri", ISO_639_FLAG_2T, 1157 }, + { "mi", "mao", ISO_639_FLAG_2B, 1157 }, + /* Macedonian */ + { "mk", "mkd", ISO_639_FLAG_2T, 1163 }, + { "mk", "mac", ISO_639_FLAG_2B, 1163 }, + /* Malayalam */ + { "ml", "mal", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1174 }, + /* Mongolian */ + { "mn", "mon", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1184 }, + /* Moldavian; Moldovan */ + { "mo", "mol", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1194 }, + /* Marathi */ + { "mr", "mar", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1214 }, + /* Malay */ + { "ms", "msa", ISO_639_FLAG_2T, 1222 }, + { "ms", "may", ISO_639_FLAG_2B, 1222 }, + /* Maltese */ + { "mt", "mlt", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1228 }, + /* Burmese */ + { "my", "mya", ISO_639_FLAG_2T, 1236 }, + { "my", "bur", ISO_639_FLAG_2B, 1236 }, + /* Nauru */ + { "na", "nau", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1244 }, + /* Bokm?l, Norwegian; Norwegian Bokm?l */ + { "nb", "nob", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1250 }, + /* Ndebele, North; North Ndebele */ + { "nd", "nde", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1288 }, + /* Nepali */ + { "ne", "nep", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1318 }, + /* Ndonga */ + { "ng", "ndo", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1325 }, + /* Dutch; Flemish */ + { "nl", "nld", ISO_639_FLAG_2T, 1332 }, + { "nl", "dut", ISO_639_FLAG_2B, 1332 }, + /* Norwegian Nynorsk; Nynorsk, Norwegian */ + { "nn", "nno", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1347 }, + /* Norwegian */ + { "no", "nor", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1385 }, + /* Ndebele, South; South Ndebele */ + { "nr", "nbl", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1395 }, + /* Navajo; Navaho */ + { "nv", "nav", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1425 }, + /* Chichewa; Chewa; Nyanja */ + { "ny", "nya", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1440 }, + /* Occitan (post 1500) */ + { "oc", "oci", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1464 }, + /* Ojibwa */ + { "oj", "oji", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1484 }, + /* Oromo */ + { "om", "orm", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1491 }, + /* Oriya */ + { "or", "ori", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1497 }, + /* Ossetian; Ossetic */ + { "os", "oss", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1503 }, + /* Panjabi; Punjabi */ + { "pa", "pan", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1521 }, + /* Pali */ + { "pi", "pli", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1538 }, + /* Polish */ + { "pl", "pol", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1543 }, + /* Pushto; Pashto */ + { "ps", "pus", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1550 }, + /* Portuguese */ + { "pt", "por", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1565 }, + /* Quechua */ + { "qu", "que", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1576 }, + /* Romansh */ + { "rm", "roh", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1584 }, + /* Rundi */ + { "rn", "run", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1592 }, + /* Romanian */ + { "ro", "ron", ISO_639_FLAG_2T, 1598 }, + { "ro", "rum", ISO_639_FLAG_2B, 1598 }, + /* Russian */ + { "ru", "rus", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1607 }, + /* Kinyarwanda */ + { "rw", "kin", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1615 }, + /* Sanskrit */ + { "sa", "san", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1627 }, + /* Sardinian */ + { "sc", "srd", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1636 }, + /* Sindhi */ + { "sd", "snd", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1646 }, + /* Northern Sami */ + { "se", "sme", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1653 }, + /* Sango */ + { "sg", "sag", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1667 }, + /* Sinhala; Sinhalese */ + { "si", "sin", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1673 }, + /* Slovak */ + { "sk", "slk", ISO_639_FLAG_2T, 1692 }, + { "sk", "slo", ISO_639_FLAG_2B, 1692 }, + /* Slovenian */ + { "sl", "slv", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1699 }, + /* Samoan */ + { "sm", "smo", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1709 }, + /* Shona */ + { "sn", "sna", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1716 }, + /* Somali */ + { "so", "som", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1722 }, + /* Albanian */ + { "sq", "sqi", ISO_639_FLAG_2T, 1729 }, + { "sq", "alb", ISO_639_FLAG_2B, 1729 }, + /* Serbian */ + { "sr", "srp", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1738 }, + /* Swati */ + { "ss", "ssw", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1746 }, + /* Sotho, Southern */ + { "st", "sot", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1752 }, + /* Sundanese */ + { "su", "sun", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1768 }, + /* Swedish */ + { "sv", "swe", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1778 }, + /* Swahili */ + { "sw", "swa", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1786 }, + /* Tamil */ + { "ta", "tam", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1794 }, + /* Telugu */ + { "te", "tel", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1800 }, + /* Tajik */ + { "tg", "tgk", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1807 }, + /* Thai */ + { "th", "tha", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1813 }, + /* Tigrinya */ + { "ti", "tir", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1818 }, + /* Turkmen */ + { "tk", "tuk", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1827 }, + /* Tagalog */ + { "tl", "tgl", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1835 }, + /* Tswana */ + { "tn", "tsn", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1843 }, + /* Tonga (Tonga Islands) */ + { "to", "ton", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1850 }, + /* Turkish */ + { "tr", "tur", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1872 }, + /* Tsonga */ + { "ts", "tso", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1880 }, + /* Tatar */ + { "tt", "tat", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1887 }, + /* Twi */ + { "tw", "twi", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1893 }, + /* Tahitian */ + { "ty", "tah", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1897 }, + /* Uighur; Uyghur */ + { "ug", "uig", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1906 }, + /* Ukrainian */ + { "uk", "ukr", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1921 }, + /* Urdu */ + { "ur", "urd", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1931 }, + /* Uzbek */ + { "uz", "uzb", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1936 }, + /* Venda */ + { "ve", "ven", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1942 }, + /* Vietnamese */ + { "vi", "vie", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1948 }, + /* Volap?k */ + { "vo", "vol", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1959 }, + /* Walloon */ + { "wa", "wln", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1968 }, + /* Wolof */ + { "wo", "wol", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1976 }, + /* Xhosa */ + { "xh", "xho", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1982 }, + /* Yiddish */ + { "yi", "yid", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1988 }, + /* Yoruba */ + { "yo", "yor", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 1996 }, + /* Zhuang; Chuang */ + { "za", "zha", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 2003 }, + /* Chinese */ + { "zh", "zho", ISO_639_FLAG_2T, 2018 }, + { "zh", "chi", ISO_639_FLAG_2B, 2018 }, + /* Zulu */ + { "zu", "zul", ISO_639_FLAG_2T | ISO_639_FLAG_2B, 2026 }, +}; + +const gchar iso_639_names[] = + "Afar\000Abkhazian\000Avestan\000Afrikaans\000Akan\000Amharic\000Aragonese" + "\000Arabic\000Assamese\000Avaric\000Aymara\000Azerbaijani\000Bashkir\000B" + "elarusian\000Bulgarian\000Bihari languages\000Bislama\000Bambara\000Benga" + "li\000Tibetan\000Breton\000Bosnian\000Catalan; Valencian\000Chechen\000Ch" + "amorro\000Corsican\000Cree\000Czech\000Church Slavic; Old Slavonic; Churc" + "h Slavonic; Old Bulgarian; Old Church Slavonic\000Chuvash\000Welsh\000Dan" + "ish\000German\000Divehi; Dhivehi; Maldivian\000Dzongkha\000Ewe\000Greek, " + "Modern (1453-)\000English\000Esperanto\000Spanish; Castilian\000Estonian" + "\000Basque\000Persian\000Fulah\000Finnish\000Fijian\000Faroese\000French" + "\000Western Frisian\000Irish\000Gaelic; Scottish Gaelic\000Galician\000Gu" + "arani\000Gujarati\000Manx\000Hausa\000Hebrew\000Hindi\000Hiri Motu\000Cro" + "atian\000Haitian; Haitian Creole\000Hungarian\000Armenian\000Herero\000In" + "terlingua (International Auxiliary Language Association)\000Indonesian" + "\000Interlingue; Occidental\000Igbo\000Sichuan Yi; Nuosu\000Inupiaq\000Id" + "o\000Icelandic\000Italian\000Inuktitut\000Japanese\000Javanese\000Georgia" + "n\000Kongo\000Kikuyu; Gikuyu\000Kuanyama; Kwanyama\000Kazakh\000Kalaallis" + "ut; Greenlandic\000Central Khmer\000Kannada\000Korean\000Kanuri\000Kashmi" + "ri\000Kurdish\000Komi\000Cornish\000Kirghiz; Kyrgyz\000Latin\000Luxembour" + "gish; Letzeburgesch\000Ganda\000Limburgan; Limburger; Limburgish\000Linga" + "la\000Lao\000Lithuanian\000Luba-Katanga\000Latvian\000Malagasy\000Marshal" + "lese\000Maori\000Macedonian\000Malayalam\000Mongolian\000Moldavian; Moldo" + "van\000Marathi\000Malay\000Maltese\000Burmese\000Nauru\000Bokm\303\245l, " + "Norwegian; Norwegian Bokm\303\245l\000Ndebele, North; North Ndebele\000Ne" + "pali\000Ndonga\000Dutch; Flemish\000Norwegian Nynorsk; Nynorsk, Norwegian" + "\000Norwegian\000Ndebele, South; South Ndebele\000Navajo; Navaho\000Chich" + "ewa; Chewa; Nyanja\000Occitan (post 1500)\000Ojibwa\000Oromo\000Oriya\000" + "Ossetian; Ossetic\000Panjabi; Punjabi\000Pali\000Polish\000Pushto; Pashto" + "\000Portuguese\000Quechua\000Romansh\000Rundi\000Romanian\000Russian\000K" + "inyarwanda\000Sanskrit\000Sardinian\000Sindhi\000Northern Sami\000Sango" + "\000Sinhala; Sinhalese\000Slovak\000Slovenian\000Samoan\000Shona\000Somal" + "i\000Albanian\000Serbian\000Swati\000Sotho, Southern\000Sundanese\000Swed" + "ish\000Swahili\000Tamil\000Telugu\000Tajik\000Thai\000Tigrinya\000Turkmen" + "\000Tagalog\000Tswana\000Tonga (Tonga Islands)\000Turkish\000Tsonga\000Ta" + "tar\000Twi\000Tahitian\000Uighur; Uyghur\000Ukrainian\000Urdu\000Uzbek" + "\000Venda\000Vietnamese\000Volap\303\274k\000Walloon\000Wolof\000Xhosa" + "\000Yiddish\000Yoruba\000Zhuang; Chuang\000Chinese\000Zulu"; + +/* *INDENT-ON* */ diff --git a/gst-libs/gst/tag/lang.c b/gst-libs/gst/tag/lang.c new file mode 100644 index 0000000000..1a7de69089 --- /dev/null +++ b/gst-libs/gst/tag/lang.c @@ -0,0 +1,491 @@ +/* GStreamer language codes and names utility functions + * Copyright (C) 2009 Tim-Philipp Müller + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/** + * SECTION:gsttaglanguagecodes + * @short_description: mappings for ISO-639 language codes and names + * @see_also: #GstTagList + * + * + * + * Provides helper functions to convert between the various ISO-639 language + * codes, and to map language codes to language names. + * + * + */ + +/* FIXME 0.11: maybe switch to ISO-639-2 everywhere incl. GST_TAG_LANGUAGE? */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#undef GETTEXT_PACKAGE +#define GETTEXT_PACKAGE "iso_639" + +#define ISO_639_XML_PATH ISO_CODES_PREFIX "/share/xml/iso-codes/iso_639.xml" +#define ISO_CODES_LOCALEDIR ISO_CODES_PREFIX "/share/locale" + +#include +#include +#include +#include + +#include "lang-tables.c" + +#ifndef GST_DISABLE_GST_DEBUG + +#define GST_CAT_DEFAULT ensure_debug_category() + +static GstDebugCategory * +ensure_debug_category (void) +{ + static gsize cat_gonce = 0; + + if (g_once_init_enter (&cat_gonce)) { + gsize cat_done; + + cat_done = (gsize) _gst_debug_category_new ("tag-langcodes", 0, + "GstTag language codes and names"); + + g_once_init_leave (&cat_gonce, cat_done); + } + + return (GstDebugCategory *) cat_gonce; +} + +#else + +#define ensure_debug_category() /* NOOP */ + +#endif /* GST_DISABLE_GST_DEBUG */ + +/* ------------------------------------------------------------------------- */ + +/* Loading and initing */ + +#if defined(HAVE_ISO_CODES) +static const gchar * +get_val (const gchar ** names, const gchar ** vals, const gchar * name) +{ + while (names != NULL && *names != NULL) { + if (strcmp (*names, name) == 0) + return *vals; + ++names; + ++vals; + } + return NULL; +} + +static void +parse_start_element (GMarkupParseContext * ctx, const gchar * element_name, + const gchar ** attr_names, const gchar ** attr_vals, + gpointer user_data, GError ** error) +{ + GHashTable *ht = (GHashTable *) user_data; + const gchar *c1, *c2t, *c2b, *name, *tname; + + if (strcmp (element_name, "iso_639_entry") != 0) + return; + + c1 = get_val (attr_names, attr_vals, "iso_639_1_code"); + + /* only interested in languages with an ISO 639-1 code for now */ + if (c1 == NULL) + return; + + c2t = get_val (attr_names, attr_vals, "iso_639_2T_code"); + c2b = get_val (attr_names, attr_vals, "iso_639_2B_code"); + name = get_val (attr_names, attr_vals, "name"); + + if (c2t == NULL || c2b == NULL || name == NULL) { + GST_WARNING ("broken iso_639.xml entry: c2t=%p, c2b=%p, name=%p", c2t, + c2b, name); + return; + } + + /* translate language name */ + tname = _(name); + + /* if no translation was found, it will return the input string, which we + * we don't want to put into the hash table because it will be freed again */ + if (G_UNLIKELY (tname == name)) + tname = g_intern_string (name); + + /* now overwrite default/fallback mappings with names in locale language */ + g_hash_table_replace (ht, (gpointer) g_intern_string (c1), (gpointer) tname); + g_hash_table_replace (ht, (gpointer) g_intern_string (c2b), (gpointer) tname); + if (strcmp (c2t, c2b) != 0) { + g_hash_table_replace (ht, (gpointer) g_intern_string (c2t), + (gpointer) tname); + } + + GST_LOG ("%s %s %s : %s - %s", c1, c2t, c2b, name, tname); +} + +static void +gst_tag_load_iso_639_xml (GHashTable * ht) +{ + GMappedFile *f; + GError *err = NULL; + gchar *xml_data; + gsize xml_len; + +#ifdef ENABLE_NLS + GST_DEBUG ("binding text domain %s to locale dir %s", GETTEXT_PACKAGE, + ISO_CODES_LOCALEDIR); + bindtextdomain (GETTEXT_PACKAGE, ISO_CODES_LOCALEDIR); + bind_textdomain_codeset (GETTEXT_PACKAGE, "UTF-8"); +#endif + + f = g_mapped_file_new (ISO_639_XML_PATH, FALSE, NULL); + if (f != NULL) { + xml_data = (gchar *) g_mapped_file_get_contents (f); + xml_len = g_mapped_file_get_length (f); + } else { + if (!g_file_get_contents (ISO_639_XML_PATH, &xml_data, &xml_len, &err)) { + GST_WARNING ("Could not read %s: %s", ISO_639_XML_PATH, err->message); + g_error_free (err); + return; + } + } + + if (g_utf8_validate (xml_data, xml_len, NULL)) { + GMarkupParser xml_parser = { parse_start_element, NULL, NULL, NULL, NULL }; + GMarkupParseContext *ctx; + + ctx = g_markup_parse_context_new (&xml_parser, 0, ht, NULL); + if (!g_markup_parse_context_parse (ctx, xml_data, xml_len, &err)) { + GST_WARNING ("Parsing iso_639.xml failed: %s", err->message); + g_error_free (err); + } + g_markup_parse_context_free (ctx); + } else { + GST_WARNING ("iso_639.xml file is not valid UTF-8"); + GST_MEMDUMP ("iso_639.xml file", (guint8 *) xml_data, xml_len); + } + + /* ... and clean up */ + if (f != NULL) + g_mapped_file_unref (f); + else + g_free (xml_data); +} +#endif /* HAVE_ISO_CODES */ + +static GHashTable * +gst_tag_get_iso_639_ht (void) +{ + static gsize once_val = 0; + int i; + + if (g_once_init_enter (&once_val)) { + GHashTable *ht; + gsize done_val; + + GST_MEMDUMP ("iso 639 language names (internal default/fallback)", + (guint8 *) iso_639_names, sizeof (iso_639_names)); + + /* maps code -> language name; all strings are either interned strings + * or const static strings from lang-table.c */ + ht = g_hash_table_new (g_str_hash, g_str_equal); + + /* set up default/fallback mappings */ + for (i = 0; i < G_N_ELEMENTS (iso_639_codes); ++i) { + GST_LOG ("%3d %s %s %c%c 0x%04x %s", i, iso_639_codes[i].iso_639_1, + iso_639_codes[i].iso_639_2, + ((iso_639_codes[i].flags & ISO_639_FLAG_2B)) ? 'B' : '.', + ((iso_639_codes[i].flags & ISO_639_FLAG_2T)) ? 'T' : '.', + iso_639_codes[i].name_offset, + iso_639_names + iso_639_codes[i].name_offset); + +#ifdef HAVE_ISO_CODES + /* intern these in order to minimise allocations when interning strings + * read from the xml file later */ + g_intern_static_string (iso_639_codes[i].iso_639_1); + g_intern_static_string (iso_639_codes[i].iso_639_2); + g_intern_static_string (iso_639_names + iso_639_codes[i].name_offset); +#endif + + /* and add default mapping (these strings are always valid) */ + g_hash_table_insert (ht, (gpointer) iso_639_codes[i].iso_639_1, + (gpointer) (iso_639_names + iso_639_codes[i].name_offset)); + g_hash_table_insert (ht, (gpointer) iso_639_codes[i].iso_639_2, + (gpointer) (iso_639_names + iso_639_codes[i].name_offset)); + } + +#ifdef HAVE_ISO_CODES + { + GstClockTime ts = gst_util_get_timestamp (); + + gst_tag_load_iso_639_xml (ht); + + ts = gst_util_get_timestamp () - ts; + GST_INFO ("iso_639.xml loading took %.2gms", (double) ts / GST_MSECOND); + } +#else + GST_INFO ("iso-codes disabled or not available"); +#endif + + done_val = (gsize) ht; + g_once_init_leave (&once_val, done_val); + } + + return (GHashTable *) once_val; +} + +/* ------------------------------------------------------------------------- */ + +static int +qsort_strcmp_func (const void *p1, const void *p2) +{ + return strcmp (*(char *const *) p1, *(char *const *) p2); +} + +/** + * gst_tag_get_language_codes: + * + * Returns a list of known language codes (in form of two-letter ISO-639-1 + * codes). This is useful for UIs to build a list of available languages for + * tagging purposes (e.g. to tag an audio track appropriately in a video or + * audio editor). + * + * Returns: NULL-terminated string array with two-letter language codes. Free + * with g_strfreev() when no longer needed. + * + * Since: 0.10.26 + */ +gchar ** +gst_tag_get_language_codes (void) +{ + GHashTableIter iter; + GHashTable *ht; + gpointer key; + gchar **codes; + int i; + + ensure_debug_category (); + + ht = gst_tag_get_iso_639_ht (); + + /* we have at least two keys for each language (-1 code and -2 code) */ + codes = g_new (gchar *, (g_hash_table_size (ht) / 2) + 1); + + i = 0; + g_hash_table_iter_init (&iter, ht); + while (g_hash_table_iter_next (&iter, &key, NULL)) { + const gchar *lang_code = key; + + if (strlen (lang_code) == 2) { + codes[i] = g_strdup (lang_code); + ++i; + } + } + codes[i] = NULL; + + /* be nice and sort the list */ + qsort (&codes[0], i, sizeof (gchar *), qsort_strcmp_func); + + return codes; +} + +/** + * gst_tag_get_language_name: + * @language_code: two or three-letter ISO-639 language code + * + * Returns the name of the language given an ISO-639 language code, such + * as often found in a GST_TAG_LANGUAGE tag. The name will be translated + * according to the current locale (if the library was built against the + * iso-codes package), otherwise the English name will be returned. + * + * Language codes are case-sensitive and expected to be lower case. + * + * Returns: language name in UTF-8 format. The returned string must not be + * modified and does not need to freed; it will stay valid until the + * application is terminated. + * + * Since: 0.10.26 + */ +const gchar * +gst_tag_get_language_name (const gchar * language_code) +{ + const gchar *lang_name; + GHashTable *ht; + + g_return_val_if_fail (language_code != NULL, NULL); + + ensure_debug_category (); + + ht = gst_tag_get_iso_639_ht (); + + lang_name = g_hash_table_lookup (ht, (gpointer) language_code); + GST_LOG ("%s -> %s", language_code, GST_STR_NULL (lang_name)); + + return lang_name; +} + +/** + * gst_tag_get_language_code_iso_639_1: + * @lang_code: ISO-639 language code (e.g. "deu" or "ger" or "de") + * + * Returns two-letter ISO-639-1 language code given a three-letter ISO-639-2 + * language code or two-letter ISO-639-1 language code (both are accepted for + * convenience). + * + * Language codes are case-sensitive and expected to be lower case. + * + * Returns two-letter ISO-639-1 language code string that maps to @lang_code, + * or NULL if no mapping is known. The returned string must not be + * modified or freed. + * + * Since: 0.10.26 + */ +const gchar * +gst_tag_get_language_code_iso_639_1 (const gchar * lang_code) +{ + const gchar *c = NULL; + int i; + + g_return_val_if_fail (lang_code != NULL, NULL); + + ensure_debug_category (); + + /* FIXME: we are being a bit inconsistent here in the sense that will only + * map the language codes from our static table. Theoretically the iso-codes + * XML file might have had additional codes that are now in the hash table. + * We keep it simple for now and don't waste memory on additional tables. */ + for (i = 0; i < G_N_ELEMENTS (iso_639_codes); ++i) { + /* we check both codes here, so function can be used in a more versatile + * way, to convert a language tag to a two-letter language code and/or + * verify an existing code */ + if (strcmp (lang_code, iso_639_codes[i].iso_639_1) == 0 || + strcmp (lang_code, iso_639_codes[i].iso_639_2) == 0) { + c = iso_639_codes[i].iso_639_1; + break; + } + } + + GST_LOG ("%s -> %s", lang_code, GST_STR_NULL (c)); + + return c; +} + +static const gchar * +gst_tag_get_language_code_iso_639_2X (const gchar * lang_code, guint8 flags) +{ + int i; + + /* FIXME: we are being a bit inconsistent here in the sense that we will only + * map the language codes from our static table. Theoretically the iso-codes + * XML file might have had additional codes that are now in the hash table. + * We keep it simple for now and don't waste memory on additional tables. + * Also, we currently only parse the iso_639.xml file if language names or + * a list of all codes is requested, and it'd be nice to keep it like that. */ + for (i = 0; i < G_N_ELEMENTS (iso_639_codes); ++i) { + /* we check both codes here, so function can be used in a more versatile + * way, to convert a language tag to a three-letter language code and/or + * verify an existing code */ + if (strcmp (lang_code, iso_639_codes[i].iso_639_1) == 0 || + strcmp (lang_code, iso_639_codes[i].iso_639_2) == 0) { + if ((iso_639_codes[i].flags & flags) == flags) { + return iso_639_codes[i].iso_639_2; + } else if (i > 0 && (iso_639_codes[i - 1].flags & flags) == flags && + iso_639_codes[i].name_offset == iso_639_codes[i - 1].name_offset) { + return iso_639_codes[i - 1].iso_639_2; + } else if (i < G_N_ELEMENTS (iso_639_codes) && + (iso_639_codes[i + 1].flags & flags) == flags && + iso_639_codes[i].name_offset == iso_639_codes[i + 1].name_offset) { + return iso_639_codes[i + 1].iso_639_2; + } + } + } + return NULL; +} + +/** + * gst_tag_get_language_code_iso_639_2T: + * @lang_code: ISO-639 language code (e.g. "deu" or "ger" or "de") + * + * Returns three-letter ISO-639-2 "terminological" language code given a + * two-letter ISO-639-1 language code or a three-letter ISO-639-2 language + * code (both are accepted for convenience). + * + * The "terminological" code is derived from the local name of the language + * (e.g. "deu" for German instead of "ger"). In most scenarios, the + * "terminological" codes are prefered over the "bibliographic" ones. + * + * Language codes are case-sensitive and expected to be lower case. + * + * Returns three-letter ISO-639-2 language code string that maps to @lang_code, + * or NULL if no mapping is known. The returned string must not be + * modified or freed. + * + * Since: 0.10.26 + */ +const gchar * +gst_tag_get_language_code_iso_639_2T (const gchar * lang_code) +{ + const gchar *c; + + g_return_val_if_fail (lang_code != NULL, NULL); + + ensure_debug_category (); + + c = gst_tag_get_language_code_iso_639_2X (lang_code, ISO_639_FLAG_2T); + + GST_LOG ("%s -> %s", lang_code, GST_STR_NULL (c)); + + return c; +} + +/** + * gst_tag_get_language_code_iso_639_2B: + * @lang_code: ISO-639 language code (e.g. "deu" or "ger" or "de") + * + * Returns three-letter ISO-639-2 "bibliographic" language code given a + * two-letter ISO-639-1 language code or a three-letter ISO-639-2 language + * code (both are accepted for convenience). + * + * The "bibliographic" code is derived from the English name of the language + * (e.g. "ger" for German instead of "de" or "deu"). In most scenarios, the + * "terminological" codes are prefered. + * + * Language codes are case-sensitive and expected to be lower case. + * + * Returns three-letter ISO-639-2 language code string that maps to @lang_code, + * or NULL if no mapping is known. The returned string must not be + * modified or freed. + * + * Since: 0.10.26 + */ +const gchar * +gst_tag_get_language_code_iso_639_2B (const gchar * lang_code) +{ + const gchar *c; + + g_return_val_if_fail (lang_code != NULL, NULL); + + ensure_debug_category (); + + c = gst_tag_get_language_code_iso_639_2X (lang_code, ISO_639_FLAG_2B); + + GST_LOG ("%s -> %s", lang_code, GST_STR_NULL (c)); + + return c; +} diff --git a/gst-libs/gst/tag/mklangtables.c b/gst-libs/gst/tag/mklangtables.c new file mode 100644 index 0000000000..ab4e56b806 --- /dev/null +++ b/gst-libs/gst/tag/mklangtables.c @@ -0,0 +1,239 @@ +/* GStreamer Language Tag Utility Functions + * Copyright (C) 2009 Tim-Philipp Müller + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Library General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Library General Public License for more details. + * + * You should have received a copy of the GNU Library General Public + * License along with this library; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. + */ + +/* mklangtables.c: + * little program that reads iso_639.xml and outputs tables for us as fallback + * for when iso-codes are not available or we fail to read the file for some + * reason, and so we don't have to parse the xml file just to map codes. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include + +#define ISO_639_XML_PATH ISO_CODES_PREFIX "/share/xml/iso-codes/iso_639.xml" + +typedef struct +{ + gchar code_1[3]; /* de */ + gchar code_2t[4]; /* deu */ + gchar code_2b[4]; /* ger */ + const gchar *name; /* German */ + guint name_offset; /* offset into string table */ +} IsoLang; + +static GArray *languages = NULL; + +static void +dump_languages (void) +{ + GString *names; + const char *s; + int i, num_escaped; + + g_assert (languages != NULL); + + names = g_string_new (""); + + g_print ("/* generated by " __FILE__ " iso-codes " ISO_CODES_VERSION " */\n"); + g_print ("\n"); + g_print ("#include \n"); + g_print ("\n"); + g_print ("#define ISO_639_FLAG_2T (1 << 0)\n"); + g_print ("#define ISO_639_FLAG_2B (1 << 1)\n"); + g_print ("\n"); + g_print ("/* *INDENT-OFF* */\n"); + g_print ("\n"); + g_print ("static const struct\n"); + g_print ("{\n"); + g_print (" const gchar iso_639_1[3];\n"); + g_print (" const gchar iso_639_2[4];\n"); + g_print (" guint8 flags;\n"); + g_print (" guint16 name_offset;\n"); + g_print ("} iso_639_codes[] = {\n"); + + for (i = 0, num_escaped = 0; i < languages->len; ++i) { + IsoLang *lang = &g_array_index (languages, IsoLang, i); + + /* For now just print those where there's both a ISO-639-1 and -2 code */ + if (lang->code_1[0] == '\0') + continue; + + /* save current offset */ + lang->name_offset = names->len; + + /* adjust for fact that \000 is 4 chars now but will take up only 1 later */ + lang->name_offset -= num_escaped * 3; + + /* append one char at a time, making sure to escape UTF-8 characters */ + for (s = lang->name; s != NULL && *s != '\0'; ++s) { + if (g_ascii_isprint (*s) && *s != '"' && *s != '\\') { + g_string_append_c (names, *s); + } else { + g_string_append_printf (names, "\\%03o", (unsigned char) *s); + ++num_escaped; + } + } + g_string_append (names, "\\000"); + ++num_escaped; + + g_print (" /* %s */\n", lang->name); + if (strcmp (lang->code_2b, lang->code_2t) == 0) { + g_print (" { \"%s\", \"%s\", ISO_639_FLAG_2T | ISO_639_FLAG_2B, %u },\n", + lang->code_1, lang->code_2t, lang->name_offset); + } else { + /* if 639-2T and 639-2B differ, put 639-2T first */ + g_print (" { \"%s\", \"%s\", ISO_639_FLAG_2T, %u },\n", + lang->code_1, lang->code_2t, lang->name_offset); + g_print (" { \"%s\", \"%s\", ISO_639_FLAG_2B, %u },\n", + lang->code_1, lang->code_2b, lang->name_offset); + } + } + + g_print ("};\n"); + g_print ("\n"); + g_print ("const gchar iso_639_names[] =\n"); + s = names->str; + while (s != NULL && *s != '\0') { + gchar line[74], *lastesc; + guint left; + + left = strlen (s); + g_strlcpy (line, s, MIN (left, sizeof (line))); + s += sizeof (line) - 1; + /* avoid partial escaped codes at the end of a line */ + if ((lastesc = strrchr (line, '\\')) && strlen (lastesc) < 4) { + s -= strlen (lastesc); + *lastesc = '\0'; + } + g_print (" \"%s\"", line); + if (left < 74) + break; + g_print ("\n"); + } + g_print (";\n"); + g_print ("\n"); + g_print ("/* *INDENT-ON* */\n"); + + g_string_free (names, TRUE); +} + +static gboolean +copy_attribute (gchar * dest, guint dest_len, const gchar ** attr_names, + const gchar ** attr_vals, const gchar * needle) +{ + while (attr_names != NULL && *attr_names != NULL) { + if (strcmp (*attr_names, needle) == 0) { + g_strlcpy (dest, *attr_vals, dest_len); + return TRUE; + } + ++attr_names; + ++attr_vals; + } + dest[0] = '\0'; + return FALSE; +} + +static void +xml_start_element (GMarkupParseContext * ctx, const gchar * element_name, + const gchar ** attr_names, const gchar ** attr_vals, + gpointer user_data, GError ** error) +{ + gchar name[256]; + IsoLang lang; + + if (strcmp (element_name, "iso_639_entry") != 0) + return; + + copy_attribute (lang.code_1, 3, attr_names, attr_vals, "iso_639_1_code"); + copy_attribute (lang.code_2t, 4, attr_names, attr_vals, "iso_639_2T_code"); + copy_attribute (lang.code_2b, 4, attr_names, attr_vals, "iso_639_2B_code"); + + copy_attribute (name, sizeof (name), attr_names, attr_vals, "name"); + lang.name = g_intern_string (name); + + g_array_append_val (languages, lang); +} + +static void +parse_iso_639_xml (const gchar * data, gsize len) +{ + GMarkupParser xml_parser = { xml_start_element, NULL, NULL, NULL, NULL }; + GMarkupParseContext *ctx; + GError *err = NULL; + + g_return_if_fail (g_utf8_validate (data, len, NULL)); + + ctx = g_markup_parse_context_new (&xml_parser, 0, NULL, NULL); + if (!g_markup_parse_context_parse (ctx, data, len, &err)) + g_error ("Parsing failed: %s", err->message); + + g_markup_parse_context_free (ctx); +} + +static gint +languages_sort_func (IsoLang * l1, IsoLang * l2) +{ + if (l1 == l2) + return 0; + + if (l1->code_1[0] == '\0' && l2->code_1[0] != '\0') + return -1; + + return strcmp (l1->code_1, l2->code_1); +} + +int +main (int argc, char **argv) +{ + GMappedFile *f; + gchar *xml_data; + gsize xml_len; + + f = g_mapped_file_new (ISO_639_XML_PATH, FALSE, NULL); + if (f != NULL) { + xml_data = (gchar *) g_mapped_file_get_contents (f); + xml_len = g_mapped_file_get_length (f); + } else { + GError *err = NULL; + + if (!g_file_get_contents (ISO_639_XML_PATH, &xml_data, &xml_len, &err)) + g_error ("Could not read %s: %s", ISO_639_XML_PATH, err->message); + } + + languages = g_array_new (FALSE, TRUE, sizeof (IsoLang)); + + parse_iso_639_xml (xml_data, xml_len); + + g_array_sort (languages, (GCompareFunc) languages_sort_func); + + dump_languages (); + + g_array_free (languages, TRUE); + + if (f != NULL) + g_mapped_file_unref (f); + else + g_free (xml_data); + + return 0; +} diff --git a/gst-libs/gst/tag/tag.h b/gst-libs/gst/tag/tag.h index 238431e7ff..0a439c045d 100644 --- a/gst-libs/gst/tag/tag.h +++ b/gst-libs/gst/tag/tag.h @@ -238,6 +238,30 @@ GstBuffer * gst_tag_image_data_to_image_buffer (const guint8 * ima /* FIXME 0.11: replace with a more general gst_tag_library_init() */ void gst_tag_register_musicbrainz_tags (void); + +/* language tag related functions */ + +gchar ** gst_tag_get_language_codes (void); + +const gchar * gst_tag_get_language_name (const gchar * language_code); + +const gchar * gst_tag_get_language_code_iso_639_1 (const gchar * lang_code); + +const gchar * gst_tag_get_language_code_iso_639_2B (const gchar * lang_code); + +const gchar * gst_tag_get_language_code_iso_639_2T (const gchar * lang_code); + +/** + * gst_tag_get_language_code: + * @lang_code: ISO-639 language code (e.g. "deu" or "ger" or "de") + * + * Convenience macro wrapping gst_tag_get_language_code_iso_639_1(). + * + * Since: 0.10.26 + */ +#define gst_tag_get_language_code(lang_code) \ + gst_tag_get_language_code_iso_639_1(lang_code) + G_END_DECLS #endif /* __GST_TAG_TAG_H__ */ diff --git a/tests/check/libs/tag.c b/tests/check/libs/tag.c index 014be4dfe8..2ed73ff1f6 100644 --- a/tests/check/libs/tag.c +++ b/tests/check/libs/tag.c @@ -2,7 +2,7 @@ * * unit tests for the tag support library * - * Copyright (C) 2006 Tim-Philipp Müller + * Copyright (C) 2006-2009 Tim-Philipp Müller * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public @@ -682,6 +682,56 @@ GST_START_TEST (test_id3v1_utf8_tag) GST_END_TEST; +GST_START_TEST (test_language_utils) +{ + gchar **lang_codes, **c; + + lang_codes = gst_tag_get_language_codes (); + fail_unless (lang_codes != NULL); + fail_unless (*lang_codes != NULL); + + for (c = lang_codes; c != NULL && *c != NULL; ++c) { + const gchar *lang_name; + + lang_name = gst_tag_get_language_name (*c); + GST_DEBUG ("[%s] %s\n", *c, GST_STR_NULL (lang_name)); + + fail_unless (lang_name != NULL); + fail_unless (g_utf8_validate (lang_name, -1, NULL)); + } + g_strfreev (lang_codes); + + fail_unless (gst_tag_get_language_name ("de") != NULL); + fail_unless (gst_tag_get_language_name ("deu") != NULL); + fail_unless (gst_tag_get_language_name ("ger") != NULL); + fail_unless_equals_string (gst_tag_get_language_name ("deu"), + gst_tag_get_language_name ("ger")); + fail_unless_equals_string (gst_tag_get_language_name ("de"), + gst_tag_get_language_name ("ger")); + fail_unless (gst_tag_get_language_name ("de") != + gst_tag_get_language_name ("fr")); + +#define ASSERT_STRINGS_EQUAL fail_unless_equals_string + + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code ("deu"), "de"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code ("de"), "de"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code ("ger"), "de"); + + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_1 ("deu"), "de"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_1 ("de"), "de"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_1 ("ger"), "de"); + + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_2T ("de"), "deu"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_2T ("deu"), "deu"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_2T ("ger"), "deu"); + + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_2B ("de"), "ger"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_2B ("deu"), "ger"); + ASSERT_STRINGS_EQUAL (gst_tag_get_language_code_iso_639_2B ("ger"), "ger"); +} + +GST_END_TEST; + static Suite * tag_suite (void) { @@ -694,6 +744,7 @@ tag_suite (void) tcase_add_test (tc_chain, test_vorbis_tags); tcase_add_test (tc_chain, test_id3_tags); tcase_add_test (tc_chain, test_id3v1_utf8_tag); + tcase_add_test (tc_chain, test_language_utils); return s; } diff --git a/win32/common/libgsttag.def b/win32/common/libgsttag.def index cad0d6f8c9..cbdc44990b 100644 --- a/win32/common/libgsttag.def +++ b/win32/common/libgsttag.def @@ -5,6 +5,11 @@ EXPORTS gst_tag_from_id3_tag gst_tag_from_id3_user_tag gst_tag_from_vorbis_tag + gst_tag_get_language_code_iso_639_1 + gst_tag_get_language_code_iso_639_2B + gst_tag_get_language_code_iso_639_2T + gst_tag_get_language_codes + gst_tag_get_language_name gst_tag_id3_genre_count gst_tag_id3_genre_get gst_tag_image_data_to_image_buffer