diff --git a/subprojects/gst-plugins-base/gst/subparse/samiparse.c b/subprojects/gst-plugins-base/gst/subparse/samiparse.c index d9df46442f..313693d159 100644 --- a/subprojects/gst-plugins-base/gst/subparse/samiparse.c +++ b/subprojects/gst-plugins-base/gst/subparse/samiparse.c @@ -88,272 +88,273 @@ html_context_free (HtmlContext * ctxt) g_free (ctxt); } -struct EntityMap +typedef struct { - const gunichar unescaped; - const gchar *escaped; + gunichar unescaped:24; + guint8 escaped_len; + gchar escaped[8]; +} EntityMap; + +#define ENTITY(unicode,ent) unicode,sizeof(ent)-1,ent + +static const EntityMap XmlEntities[] = { + {ENTITY (34, "quot")}, + {ENTITY (38, "amp")}, + {ENTITY (39, "apos")}, + {ENTITY (60, "lt")}, + {ENTITY (62, "gt")}, }; -struct EntityMap XmlEntities[] = { - {34, "quot;"}, - {38, "amp;"}, - {39, "apos;"}, - {60, "lt;"}, - {62, "gt;"}, - {0, NULL}, -}; - -struct EntityMap HtmlEntities[] = { -/* nbsp will handle manually +static const EntityMap HtmlEntities[] = { +/* nbsp we'll handle manually { 160, "nbsp;" }, */ - {161, "iexcl;"}, - {162, "cent;"}, - {163, "pound;"}, - {164, "curren;"}, - {165, "yen;"}, - {166, "brvbar;"}, - {167, "sect;"}, - {168, "uml;"}, - {169, "copy;"}, - {170, "ordf;"}, - {171, "laquo;"}, - {172, "not;"}, - {173, "shy;"}, - {174, "reg;"}, - {175, "macr;"}, - {176, "deg;"}, - {177, "plusmn;"}, - {178, "sup2;"}, - {179, "sup3;"}, - {180, "acute;"}, - {181, "micro;"}, - {182, "para;"}, - {183, "middot;"}, - {184, "cedil;"}, - {185, "sup1;"}, - {186, "ordm;"}, - {187, "raquo;"}, - {188, "frac14;"}, - {189, "frac12;"}, - {190, "frac34;"}, - {191, "iquest;"}, - {192, "Agrave;"}, - {193, "Aacute;"}, - {194, "Acirc;"}, - {195, "Atilde;"}, - {196, "Auml;"}, - {197, "Aring;"}, - {198, "AElig;"}, - {199, "Ccedil;"}, - {200, "Egrave;"}, - {201, "Eacute;"}, - {202, "Ecirc;"}, - {203, "Euml;"}, - {204, "Igrave;"}, - {205, "Iacute;"}, - {206, "Icirc;"}, - {207, "Iuml;"}, - {208, "ETH;"}, - {209, "Ntilde;"}, - {210, "Ograve;"}, - {211, "Oacute;"}, - {212, "Ocirc;"}, - {213, "Otilde;"}, - {214, "Ouml;"}, - {215, "times;"}, - {216, "Oslash;"}, - {217, "Ugrave;"}, - {218, "Uacute;"}, - {219, "Ucirc;"}, - {220, "Uuml;"}, - {221, "Yacute;"}, - {222, "THORN;"}, - {223, "szlig;"}, - {224, "agrave;"}, - {225, "aacute;"}, - {226, "acirc;"}, - {227, "atilde;"}, - {228, "auml;"}, - {229, "aring;"}, - {230, "aelig;"}, - {231, "ccedil;"}, - {232, "egrave;"}, - {233, "eacute;"}, - {234, "ecirc;"}, - {235, "euml;"}, - {236, "igrave;"}, - {237, "iacute;"}, - {238, "icirc;"}, - {239, "iuml;"}, - {240, "eth;"}, - {241, "ntilde;"}, - {242, "ograve;"}, - {243, "oacute;"}, - {244, "ocirc;"}, - {245, "otilde;"}, - {246, "ouml;"}, - {247, "divide;"}, - {248, "oslash;"}, - {249, "ugrave;"}, - {250, "uacute;"}, - {251, "ucirc;"}, - {252, "uuml;"}, - {253, "yacute;"}, - {254, "thorn;"}, - {255, "yuml;"}, - {338, "OElig;"}, - {339, "oelig;"}, - {352, "Scaron;"}, - {353, "scaron;"}, - {376, "Yuml;"}, - {402, "fnof;"}, - {710, "circ;"}, - {732, "tilde;"}, - {913, "Alpha;"}, - {914, "Beta;"}, - {915, "Gamma;"}, - {916, "Delta;"}, - {917, "Epsilon;"}, - {918, "Zeta;"}, - {919, "Eta;"}, - {920, "Theta;"}, - {921, "Iota;"}, - {922, "Kappa;"}, - {923, "Lambda;"}, - {924, "Mu;"}, - {925, "Nu;"}, - {926, "Xi;"}, - {927, "Omicron;"}, - {928, "Pi;"}, - {929, "Rho;"}, - {931, "Sigma;"}, - {932, "Tau;"}, - {933, "Upsilon;"}, - {934, "Phi;"}, - {935, "Chi;"}, - {936, "Psi;"}, - {937, "Omega;"}, - {945, "alpha;"}, - {946, "beta;"}, - {947, "gamma;"}, - {948, "delta;"}, - {949, "epsilon;"}, - {950, "zeta;"}, - {951, "eta;"}, - {952, "theta;"}, - {953, "iota;"}, - {954, "kappa;"}, - {955, "lambda;"}, - {956, "mu;"}, - {957, "nu;"}, - {958, "xi;"}, - {959, "omicron;"}, - {960, "pi;"}, - {961, "rho;"}, - {962, "sigmaf;"}, - {963, "sigma;"}, - {964, "tau;"}, - {965, "upsilon;"}, - {966, "phi;"}, - {967, "chi;"}, - {968, "psi;"}, - {969, "omega;"}, - {977, "thetasym;"}, - {978, "upsih;"}, - {982, "piv;"}, - {8194, "ensp;"}, - {8195, "emsp;"}, - {8201, "thinsp;"}, - {8204, "zwnj;"}, - {8205, "zwj;"}, - {8206, "lrm;"}, - {8207, "rlm;"}, - {8211, "ndash;"}, - {8212, "mdash;"}, - {8216, "lsquo;"}, - {8217, "rsquo;"}, - {8218, "sbquo;"}, - {8220, "ldquo;"}, - {8221, "rdquo;"}, - {8222, "bdquo;"}, - {8224, "dagger;"}, - {8225, "Dagger;"}, - {8226, "bull;"}, - {8230, "hellip;"}, - {8240, "permil;"}, - {8242, "prime;"}, - {8243, "Prime;"}, - {8249, "lsaquo;"}, - {8250, "rsaquo;"}, - {8254, "oline;"}, - {8260, "frasl;"}, - {8364, "euro;"}, - {8465, "image;"}, - {8472, "weierp;"}, - {8476, "real;"}, - {8482, "trade;"}, - {8501, "alefsym;"}, - {8592, "larr;"}, - {8593, "uarr;"}, - {8594, "rarr;"}, - {8595, "darr;"}, - {8596, "harr;"}, - {8629, "crarr;"}, - {8656, "lArr;"}, - {8657, "uArr;"}, - {8658, "rArr;"}, - {8659, "dArr;"}, - {8660, "hArr;"}, - {8704, "forall;"}, - {8706, "part;"}, - {8707, "exist;"}, - {8709, "empty;"}, - {8711, "nabla;"}, - {8712, "isin;"}, - {8713, "notin;"}, - {8715, "ni;"}, - {8719, "prod;"}, - {8721, "sum;"}, - {8722, "minus;"}, - {8727, "lowast;"}, - {8730, "radic;"}, - {8733, "prop;"}, - {8734, "infin;"}, - {8736, "ang;"}, - {8743, "and;"}, - {8744, "or;"}, - {8745, "cap;"}, - {8746, "cup;"}, - {8747, "int;"}, - {8756, "there4;"}, - {8764, "sim;"}, - {8773, "cong;"}, - {8776, "asymp;"}, - {8800, "ne;"}, - {8801, "equiv;"}, - {8804, "le;"}, - {8805, "ge;"}, - {8834, "sub;"}, - {8835, "sup;"}, - {8836, "nsub;"}, - {8838, "sube;"}, - {8839, "supe;"}, - {8853, "oplus;"}, - {8855, "otimes;"}, - {8869, "perp;"}, - {8901, "sdot;"}, - {8968, "lceil;"}, - {8969, "rceil;"}, - {8970, "lfloor;"}, - {8971, "rfloor;"}, - {9001, "lang;"}, - {9002, "rang;"}, - {9674, "loz;"}, - {9824, "spades;"}, - {9827, "clubs;"}, - {9829, "hearts;"}, - {9830, "diams;"}, - {0, NULL}, + {ENTITY (161, "iexcl")}, + {ENTITY (162, "cent")}, + {ENTITY (163, "pound")}, + {ENTITY (164, "curren")}, + {ENTITY (165, "yen")}, + {ENTITY (166, "brvbar")}, + {ENTITY (167, "sect")}, + {ENTITY (168, "uml")}, + {ENTITY (169, "copy")}, + {ENTITY (170, "ordf")}, + {ENTITY (171, "laquo")}, + {ENTITY (172, "not")}, + {ENTITY (173, "shy")}, + {ENTITY (174, "reg")}, + {ENTITY (175, "macr")}, + {ENTITY (176, "deg")}, + {ENTITY (177, "plusmn")}, + {ENTITY (178, "sup2")}, + {ENTITY (179, "sup3")}, + {ENTITY (180, "acute")}, + {ENTITY (181, "micro")}, + {ENTITY (182, "para")}, + {ENTITY (183, "middot")}, + {ENTITY (184, "cedil")}, + {ENTITY (185, "sup1")}, + {ENTITY (186, "ordm")}, + {ENTITY (187, "raquo")}, + {ENTITY (188, "frac14")}, + {ENTITY (189, "frac12")}, + {ENTITY (190, "frac34")}, + {ENTITY (191, "iquest")}, + {ENTITY (192, "Agrave")}, + {ENTITY (193, "Aacute")}, + {ENTITY (194, "Acirc")}, + {ENTITY (195, "Atilde")}, + {ENTITY (196, "Auml")}, + {ENTITY (197, "Aring")}, + {ENTITY (198, "AElig")}, + {ENTITY (199, "Ccedil")}, + {ENTITY (200, "Egrave")}, + {ENTITY (201, "Eacute")}, + {ENTITY (202, "Ecirc")}, + {ENTITY (203, "Euml")}, + {ENTITY (204, "Igrave")}, + {ENTITY (205, "Iacute")}, + {ENTITY (206, "Icirc")}, + {ENTITY (207, "Iuml")}, + {ENTITY (208, "ETH")}, + {ENTITY (209, "Ntilde")}, + {ENTITY (210, "Ograve")}, + {ENTITY (211, "Oacute")}, + {ENTITY (212, "Ocirc")}, + {ENTITY (213, "Otilde")}, + {ENTITY (214, "Ouml")}, + {ENTITY (215, "times")}, + {ENTITY (216, "Oslash")}, + {ENTITY (217, "Ugrave")}, + {ENTITY (218, "Uacute")}, + {ENTITY (219, "Ucirc")}, + {ENTITY (220, "Uuml")}, + {ENTITY (221, "Yacute")}, + {ENTITY (222, "THORN")}, + {ENTITY (223, "szlig")}, + {ENTITY (224, "agrave")}, + {ENTITY (225, "aacute")}, + {ENTITY (226, "acirc")}, + {ENTITY (227, "atilde")}, + {ENTITY (228, "auml")}, + {ENTITY (229, "aring")}, + {ENTITY (230, "aelig")}, + {ENTITY (231, "ccedil")}, + {ENTITY (232, "egrave")}, + {ENTITY (233, "eacute")}, + {ENTITY (234, "ecirc")}, + {ENTITY (235, "euml")}, + {ENTITY (236, "igrave")}, + {ENTITY (237, "iacute")}, + {ENTITY (238, "icirc")}, + {ENTITY (239, "iuml")}, + {ENTITY (240, "eth")}, + {ENTITY (241, "ntilde")}, + {ENTITY (242, "ograve")}, + {ENTITY (243, "oacute")}, + {ENTITY (244, "ocirc")}, + {ENTITY (245, "otilde")}, + {ENTITY (246, "ouml")}, + {ENTITY (247, "divide")}, + {ENTITY (248, "oslash")}, + {ENTITY (249, "ugrave")}, + {ENTITY (250, "uacute")}, + {ENTITY (251, "ucirc")}, + {ENTITY (252, "uuml")}, + {ENTITY (253, "yacute")}, + {ENTITY (254, "thorn")}, + {ENTITY (255, "yuml")}, + {ENTITY (338, "OElig")}, + {ENTITY (339, "oelig")}, + {ENTITY (352, "Scaron")}, + {ENTITY (353, "scaron")}, + {ENTITY (376, "Yuml")}, + {ENTITY (402, "fnof")}, + {ENTITY (710, "circ")}, + {ENTITY (732, "tilde")}, + {ENTITY (913, "Alpha")}, + {ENTITY (914, "Beta")}, + {ENTITY (915, "Gamma")}, + {ENTITY (916, "Delta")}, + {ENTITY (917, "Epsilon")}, + {ENTITY (918, "Zeta")}, + {ENTITY (919, "Eta")}, + {ENTITY (920, "Theta")}, + {ENTITY (921, "Iota")}, + {ENTITY (922, "Kappa")}, + {ENTITY (923, "Lambda")}, + {ENTITY (924, "Mu")}, + {ENTITY (925, "Nu")}, + {ENTITY (926, "Xi")}, + {ENTITY (927, "Omicron")}, + {ENTITY (928, "Pi")}, + {ENTITY (929, "Rho")}, + {ENTITY (931, "Sigma")}, + {ENTITY (932, "Tau")}, + {ENTITY (933, "Upsilon")}, + {ENTITY (934, "Phi")}, + {ENTITY (935, "Chi")}, + {ENTITY (936, "Psi")}, + {ENTITY (937, "Omega")}, + {ENTITY (945, "alpha")}, + {ENTITY (946, "beta")}, + {ENTITY (947, "gamma")}, + {ENTITY (948, "delta")}, + {ENTITY (949, "epsilon")}, + {ENTITY (950, "zeta")}, + {ENTITY (951, "eta")}, + {ENTITY (952, "theta")}, + {ENTITY (953, "iota")}, + {ENTITY (954, "kappa")}, + {ENTITY (955, "lambda")}, + {ENTITY (956, "mu")}, + {ENTITY (957, "nu")}, + {ENTITY (958, "xi")}, + {ENTITY (959, "omicron")}, + {ENTITY (960, "pi")}, + {ENTITY (961, "rho")}, + {ENTITY (962, "sigmaf")}, + {ENTITY (963, "sigma")}, + {ENTITY (964, "tau")}, + {ENTITY (965, "upsilon")}, + {ENTITY (966, "phi")}, + {ENTITY (967, "chi")}, + {ENTITY (968, "psi")}, + {ENTITY (969, "omega")}, + {ENTITY (977, "thetasym")}, + {ENTITY (978, "upsih")}, + {ENTITY (982, "piv")}, + {ENTITY (8194, "ensp")}, + {ENTITY (8195, "emsp")}, + {ENTITY (8201, "thinsp")}, + {ENTITY (8204, "zwnj")}, + {ENTITY (8205, "zwj")}, + {ENTITY (8206, "lrm")}, + {ENTITY (8207, "rlm")}, + {ENTITY (8211, "ndash")}, + {ENTITY (8212, "mdash")}, + {ENTITY (8216, "lsquo")}, + {ENTITY (8217, "rsquo")}, + {ENTITY (8218, "sbquo")}, + {ENTITY (8220, "ldquo")}, + {ENTITY (8221, "rdquo")}, + {ENTITY (8222, "bdquo")}, + {ENTITY (8224, "dagger")}, + {ENTITY (8225, "Dagger")}, + {ENTITY (8226, "bull")}, + {ENTITY (8230, "hellip")}, + {ENTITY (8240, "permil")}, + {ENTITY (8242, "prime")}, + {ENTITY (8243, "Prime")}, + {ENTITY (8249, "lsaquo")}, + {ENTITY (8250, "rsaquo")}, + {ENTITY (8254, "oline")}, + {ENTITY (8260, "frasl")}, + {ENTITY (8364, "euro")}, + {ENTITY (8465, "image")}, + {ENTITY (8472, "weierp")}, + {ENTITY (8476, "real")}, + {ENTITY (8482, "trade")}, + {ENTITY (8501, "alefsym")}, + {ENTITY (8592, "larr")}, + {ENTITY (8593, "uarr")}, + {ENTITY (8594, "rarr")}, + {ENTITY (8595, "darr")}, + {ENTITY (8596, "harr")}, + {ENTITY (8629, "crarr")}, + {ENTITY (8656, "lArr")}, + {ENTITY (8657, "uArr")}, + {ENTITY (8658, "rArr")}, + {ENTITY (8659, "dArr")}, + {ENTITY (8660, "hArr")}, + {ENTITY (8704, "forall")}, + {ENTITY (8706, "part")}, + {ENTITY (8707, "exist")}, + {ENTITY (8709, "empty")}, + {ENTITY (8711, "nabla")}, + {ENTITY (8712, "isin")}, + {ENTITY (8713, "notin")}, + {ENTITY (8715, "ni")}, + {ENTITY (8719, "prod")}, + {ENTITY (8721, "sum")}, + {ENTITY (8722, "minus")}, + {ENTITY (8727, "lowast")}, + {ENTITY (8730, "radic")}, + {ENTITY (8733, "prop")}, + {ENTITY (8734, "infin")}, + {ENTITY (8736, "ang")}, + {ENTITY (8743, "and")}, + {ENTITY (8744, "or")}, + {ENTITY (8745, "cap")}, + {ENTITY (8746, "cup")}, + {ENTITY (8747, "int")}, + {ENTITY (8756, "there4")}, + {ENTITY (8764, "sim")}, + {ENTITY (8773, "cong")}, + {ENTITY (8776, "asymp")}, + {ENTITY (8800, "ne")}, + {ENTITY (8801, "equiv")}, + {ENTITY (8804, "le")}, + {ENTITY (8805, "ge")}, + {ENTITY (8834, "sub")}, + {ENTITY (8835, "sup")}, + {ENTITY (8836, "nsub")}, + {ENTITY (8838, "sube")}, + {ENTITY (8839, "supe")}, + {ENTITY (8853, "oplus")}, + {ENTITY (8855, "otimes")}, + {ENTITY (8869, "perp")}, + {ENTITY (8901, "sdot")}, + {ENTITY (8968, "lceil")}, + {ENTITY (8969, "rceil")}, + {ENTITY (8970, "lfloor")}, + {ENTITY (8971, "rfloor")}, + {ENTITY (9001, "lang")}, + {ENTITY (9002, "rang")}, + {ENTITY (9674, "loz")}, + {ENTITY (9824, "spades")}, + {ENTITY (9827, "clubs")}, + {ENTITY (9829, "hearts")}, + {ENTITY (9830, "diams")}, }; static gchar * @@ -377,24 +378,30 @@ unescape_string (const gchar * text) } /* pass xml entities. these will be processed as pango markup */ - for (i = 0; XmlEntities[i].escaped; i++) { - gssize len = strlen (XmlEntities[i].escaped); - if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) { + for (i = 0; i < G_N_ELEMENTS (XmlEntities); i++) { + const EntityMap *entity = &XmlEntities[i]; + guint8 escaped_len = entity->escaped_len; + + if (!g_ascii_strncasecmp (text, entity->escaped, escaped_len) + && text[escaped_len] == ';') { unescaped = g_string_append_c (unescaped, '&'); unescaped = - g_string_append_len (unescaped, XmlEntities[i].escaped, len); - text += len; + g_string_append_len (unescaped, entity->escaped, escaped_len); + unescaped = g_string_append_c (unescaped, ';'); + text += escaped_len + 1; goto next; } } /* convert html entities */ - for (i = 0; HtmlEntities[i].escaped; i++) { - gssize len = strlen (HtmlEntities[i].escaped); - if (!strncmp (text, HtmlEntities[i].escaped, len)) { - unescaped = - g_string_append_unichar (unescaped, HtmlEntities[i].unescaped); - text += len; + for (i = 0; i < G_N_ELEMENTS (HtmlEntities); i++) { + const EntityMap *entity = &HtmlEntities[i]; + guint8 escaped_len = entity->escaped_len; + + if (!strncmp (text, entity->escaped, escaped_len) + && text[escaped_len] == ';') { + unescaped = g_string_append_unichar (unescaped, entity->unescaped); + text += escaped_len + 1; goto next; } }