samiparse: micro-optimise entity handling

Avoid relocations and hard-code entity string length
in the struct, since we basically get it for free here.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/2685>
This commit is contained in:
Tim-Philipp Müller 2022-06-30 00:31:24 +01:00 committed by GStreamer Marge Bot
parent 84a3b0ef87
commit 270d23c8e0

View file

@ -88,272 +88,273 @@ html_context_free (HtmlContext * ctxt)
g_free (ctxt);
}
struct EntityMap
typedef struct
{
const gunichar unescaped;
const gchar *escaped;
gunichar unescaped:24;
guint8 escaped_len;
gchar escaped[8];
} EntityMap;
#define ENTITY(unicode,ent) unicode,sizeof(ent)-1,ent
static const EntityMap XmlEntities[] = {
{ENTITY (34, "quot")},
{ENTITY (38, "amp")},
{ENTITY (39, "apos")},
{ENTITY (60, "lt")},
{ENTITY (62, "gt")},
};
struct EntityMap XmlEntities[] = {
{34, "quot;"},
{38, "amp;"},
{39, "apos;"},
{60, "lt;"},
{62, "gt;"},
{0, NULL},
};
struct EntityMap HtmlEntities[] = {
/* nbsp will handle manually
static const EntityMap HtmlEntities[] = {
/* nbsp we'll handle manually
{ 160, "nbsp;" }, */
{161, "iexcl;"},
{162, "cent;"},
{163, "pound;"},
{164, "curren;"},
{165, "yen;"},
{166, "brvbar;"},
{167, "sect;"},
{168, "uml;"},
{169, "copy;"},
{170, "ordf;"},
{171, "laquo;"},
{172, "not;"},
{173, "shy;"},
{174, "reg;"},
{175, "macr;"},
{176, "deg;"},
{177, "plusmn;"},
{178, "sup2;"},
{179, "sup3;"},
{180, "acute;"},
{181, "micro;"},
{182, "para;"},
{183, "middot;"},
{184, "cedil;"},
{185, "sup1;"},
{186, "ordm;"},
{187, "raquo;"},
{188, "frac14;"},
{189, "frac12;"},
{190, "frac34;"},
{191, "iquest;"},
{192, "Agrave;"},
{193, "Aacute;"},
{194, "Acirc;"},
{195, "Atilde;"},
{196, "Auml;"},
{197, "Aring;"},
{198, "AElig;"},
{199, "Ccedil;"},
{200, "Egrave;"},
{201, "Eacute;"},
{202, "Ecirc;"},
{203, "Euml;"},
{204, "Igrave;"},
{205, "Iacute;"},
{206, "Icirc;"},
{207, "Iuml;"},
{208, "ETH;"},
{209, "Ntilde;"},
{210, "Ograve;"},
{211, "Oacute;"},
{212, "Ocirc;"},
{213, "Otilde;"},
{214, "Ouml;"},
{215, "times;"},
{216, "Oslash;"},
{217, "Ugrave;"},
{218, "Uacute;"},
{219, "Ucirc;"},
{220, "Uuml;"},
{221, "Yacute;"},
{222, "THORN;"},
{223, "szlig;"},
{224, "agrave;"},
{225, "aacute;"},
{226, "acirc;"},
{227, "atilde;"},
{228, "auml;"},
{229, "aring;"},
{230, "aelig;"},
{231, "ccedil;"},
{232, "egrave;"},
{233, "eacute;"},
{234, "ecirc;"},
{235, "euml;"},
{236, "igrave;"},
{237, "iacute;"},
{238, "icirc;"},
{239, "iuml;"},
{240, "eth;"},
{241, "ntilde;"},
{242, "ograve;"},
{243, "oacute;"},
{244, "ocirc;"},
{245, "otilde;"},
{246, "ouml;"},
{247, "divide;"},
{248, "oslash;"},
{249, "ugrave;"},
{250, "uacute;"},
{251, "ucirc;"},
{252, "uuml;"},
{253, "yacute;"},
{254, "thorn;"},
{255, "yuml;"},
{338, "OElig;"},
{339, "oelig;"},
{352, "Scaron;"},
{353, "scaron;"},
{376, "Yuml;"},
{402, "fnof;"},
{710, "circ;"},
{732, "tilde;"},
{913, "Alpha;"},
{914, "Beta;"},
{915, "Gamma;"},
{916, "Delta;"},
{917, "Epsilon;"},
{918, "Zeta;"},
{919, "Eta;"},
{920, "Theta;"},
{921, "Iota;"},
{922, "Kappa;"},
{923, "Lambda;"},
{924, "Mu;"},
{925, "Nu;"},
{926, "Xi;"},
{927, "Omicron;"},
{928, "Pi;"},
{929, "Rho;"},
{931, "Sigma;"},
{932, "Tau;"},
{933, "Upsilon;"},
{934, "Phi;"},
{935, "Chi;"},
{936, "Psi;"},
{937, "Omega;"},
{945, "alpha;"},
{946, "beta;"},
{947, "gamma;"},
{948, "delta;"},
{949, "epsilon;"},
{950, "zeta;"},
{951, "eta;"},
{952, "theta;"},
{953, "iota;"},
{954, "kappa;"},
{955, "lambda;"},
{956, "mu;"},
{957, "nu;"},
{958, "xi;"},
{959, "omicron;"},
{960, "pi;"},
{961, "rho;"},
{962, "sigmaf;"},
{963, "sigma;"},
{964, "tau;"},
{965, "upsilon;"},
{966, "phi;"},
{967, "chi;"},
{968, "psi;"},
{969, "omega;"},
{977, "thetasym;"},
{978, "upsih;"},
{982, "piv;"},
{8194, "ensp;"},
{8195, "emsp;"},
{8201, "thinsp;"},
{8204, "zwnj;"},
{8205, "zwj;"},
{8206, "lrm;"},
{8207, "rlm;"},
{8211, "ndash;"},
{8212, "mdash;"},
{8216, "lsquo;"},
{8217, "rsquo;"},
{8218, "sbquo;"},
{8220, "ldquo;"},
{8221, "rdquo;"},
{8222, "bdquo;"},
{8224, "dagger;"},
{8225, "Dagger;"},
{8226, "bull;"},
{8230, "hellip;"},
{8240, "permil;"},
{8242, "prime;"},
{8243, "Prime;"},
{8249, "lsaquo;"},
{8250, "rsaquo;"},
{8254, "oline;"},
{8260, "frasl;"},
{8364, "euro;"},
{8465, "image;"},
{8472, "weierp;"},
{8476, "real;"},
{8482, "trade;"},
{8501, "alefsym;"},
{8592, "larr;"},
{8593, "uarr;"},
{8594, "rarr;"},
{8595, "darr;"},
{8596, "harr;"},
{8629, "crarr;"},
{8656, "lArr;"},
{8657, "uArr;"},
{8658, "rArr;"},
{8659, "dArr;"},
{8660, "hArr;"},
{8704, "forall;"},
{8706, "part;"},
{8707, "exist;"},
{8709, "empty;"},
{8711, "nabla;"},
{8712, "isin;"},
{8713, "notin;"},
{8715, "ni;"},
{8719, "prod;"},
{8721, "sum;"},
{8722, "minus;"},
{8727, "lowast;"},
{8730, "radic;"},
{8733, "prop;"},
{8734, "infin;"},
{8736, "ang;"},
{8743, "and;"},
{8744, "or;"},
{8745, "cap;"},
{8746, "cup;"},
{8747, "int;"},
{8756, "there4;"},
{8764, "sim;"},
{8773, "cong;"},
{8776, "asymp;"},
{8800, "ne;"},
{8801, "equiv;"},
{8804, "le;"},
{8805, "ge;"},
{8834, "sub;"},
{8835, "sup;"},
{8836, "nsub;"},
{8838, "sube;"},
{8839, "supe;"},
{8853, "oplus;"},
{8855, "otimes;"},
{8869, "perp;"},
{8901, "sdot;"},
{8968, "lceil;"},
{8969, "rceil;"},
{8970, "lfloor;"},
{8971, "rfloor;"},
{9001, "lang;"},
{9002, "rang;"},
{9674, "loz;"},
{9824, "spades;"},
{9827, "clubs;"},
{9829, "hearts;"},
{9830, "diams;"},
{0, NULL},
{ENTITY (161, "iexcl")},
{ENTITY (162, "cent")},
{ENTITY (163, "pound")},
{ENTITY (164, "curren")},
{ENTITY (165, "yen")},
{ENTITY (166, "brvbar")},
{ENTITY (167, "sect")},
{ENTITY (168, "uml")},
{ENTITY (169, "copy")},
{ENTITY (170, "ordf")},
{ENTITY (171, "laquo")},
{ENTITY (172, "not")},
{ENTITY (173, "shy")},
{ENTITY (174, "reg")},
{ENTITY (175, "macr")},
{ENTITY (176, "deg")},
{ENTITY (177, "plusmn")},
{ENTITY (178, "sup2")},
{ENTITY (179, "sup3")},
{ENTITY (180, "acute")},
{ENTITY (181, "micro")},
{ENTITY (182, "para")},
{ENTITY (183, "middot")},
{ENTITY (184, "cedil")},
{ENTITY (185, "sup1")},
{ENTITY (186, "ordm")},
{ENTITY (187, "raquo")},
{ENTITY (188, "frac14")},
{ENTITY (189, "frac12")},
{ENTITY (190, "frac34")},
{ENTITY (191, "iquest")},
{ENTITY (192, "Agrave")},
{ENTITY (193, "Aacute")},
{ENTITY (194, "Acirc")},
{ENTITY (195, "Atilde")},
{ENTITY (196, "Auml")},
{ENTITY (197, "Aring")},
{ENTITY (198, "AElig")},
{ENTITY (199, "Ccedil")},
{ENTITY (200, "Egrave")},
{ENTITY (201, "Eacute")},
{ENTITY (202, "Ecirc")},
{ENTITY (203, "Euml")},
{ENTITY (204, "Igrave")},
{ENTITY (205, "Iacute")},
{ENTITY (206, "Icirc")},
{ENTITY (207, "Iuml")},
{ENTITY (208, "ETH")},
{ENTITY (209, "Ntilde")},
{ENTITY (210, "Ograve")},
{ENTITY (211, "Oacute")},
{ENTITY (212, "Ocirc")},
{ENTITY (213, "Otilde")},
{ENTITY (214, "Ouml")},
{ENTITY (215, "times")},
{ENTITY (216, "Oslash")},
{ENTITY (217, "Ugrave")},
{ENTITY (218, "Uacute")},
{ENTITY (219, "Ucirc")},
{ENTITY (220, "Uuml")},
{ENTITY (221, "Yacute")},
{ENTITY (222, "THORN")},
{ENTITY (223, "szlig")},
{ENTITY (224, "agrave")},
{ENTITY (225, "aacute")},
{ENTITY (226, "acirc")},
{ENTITY (227, "atilde")},
{ENTITY (228, "auml")},
{ENTITY (229, "aring")},
{ENTITY (230, "aelig")},
{ENTITY (231, "ccedil")},
{ENTITY (232, "egrave")},
{ENTITY (233, "eacute")},
{ENTITY (234, "ecirc")},
{ENTITY (235, "euml")},
{ENTITY (236, "igrave")},
{ENTITY (237, "iacute")},
{ENTITY (238, "icirc")},
{ENTITY (239, "iuml")},
{ENTITY (240, "eth")},
{ENTITY (241, "ntilde")},
{ENTITY (242, "ograve")},
{ENTITY (243, "oacute")},
{ENTITY (244, "ocirc")},
{ENTITY (245, "otilde")},
{ENTITY (246, "ouml")},
{ENTITY (247, "divide")},
{ENTITY (248, "oslash")},
{ENTITY (249, "ugrave")},
{ENTITY (250, "uacute")},
{ENTITY (251, "ucirc")},
{ENTITY (252, "uuml")},
{ENTITY (253, "yacute")},
{ENTITY (254, "thorn")},
{ENTITY (255, "yuml")},
{ENTITY (338, "OElig")},
{ENTITY (339, "oelig")},
{ENTITY (352, "Scaron")},
{ENTITY (353, "scaron")},
{ENTITY (376, "Yuml")},
{ENTITY (402, "fnof")},
{ENTITY (710, "circ")},
{ENTITY (732, "tilde")},
{ENTITY (913, "Alpha")},
{ENTITY (914, "Beta")},
{ENTITY (915, "Gamma")},
{ENTITY (916, "Delta")},
{ENTITY (917, "Epsilon")},
{ENTITY (918, "Zeta")},
{ENTITY (919, "Eta")},
{ENTITY (920, "Theta")},
{ENTITY (921, "Iota")},
{ENTITY (922, "Kappa")},
{ENTITY (923, "Lambda")},
{ENTITY (924, "Mu")},
{ENTITY (925, "Nu")},
{ENTITY (926, "Xi")},
{ENTITY (927, "Omicron")},
{ENTITY (928, "Pi")},
{ENTITY (929, "Rho")},
{ENTITY (931, "Sigma")},
{ENTITY (932, "Tau")},
{ENTITY (933, "Upsilon")},
{ENTITY (934, "Phi")},
{ENTITY (935, "Chi")},
{ENTITY (936, "Psi")},
{ENTITY (937, "Omega")},
{ENTITY (945, "alpha")},
{ENTITY (946, "beta")},
{ENTITY (947, "gamma")},
{ENTITY (948, "delta")},
{ENTITY (949, "epsilon")},
{ENTITY (950, "zeta")},
{ENTITY (951, "eta")},
{ENTITY (952, "theta")},
{ENTITY (953, "iota")},
{ENTITY (954, "kappa")},
{ENTITY (955, "lambda")},
{ENTITY (956, "mu")},
{ENTITY (957, "nu")},
{ENTITY (958, "xi")},
{ENTITY (959, "omicron")},
{ENTITY (960, "pi")},
{ENTITY (961, "rho")},
{ENTITY (962, "sigmaf")},
{ENTITY (963, "sigma")},
{ENTITY (964, "tau")},
{ENTITY (965, "upsilon")},
{ENTITY (966, "phi")},
{ENTITY (967, "chi")},
{ENTITY (968, "psi")},
{ENTITY (969, "omega")},
{ENTITY (977, "thetasym")},
{ENTITY (978, "upsih")},
{ENTITY (982, "piv")},
{ENTITY (8194, "ensp")},
{ENTITY (8195, "emsp")},
{ENTITY (8201, "thinsp")},
{ENTITY (8204, "zwnj")},
{ENTITY (8205, "zwj")},
{ENTITY (8206, "lrm")},
{ENTITY (8207, "rlm")},
{ENTITY (8211, "ndash")},
{ENTITY (8212, "mdash")},
{ENTITY (8216, "lsquo")},
{ENTITY (8217, "rsquo")},
{ENTITY (8218, "sbquo")},
{ENTITY (8220, "ldquo")},
{ENTITY (8221, "rdquo")},
{ENTITY (8222, "bdquo")},
{ENTITY (8224, "dagger")},
{ENTITY (8225, "Dagger")},
{ENTITY (8226, "bull")},
{ENTITY (8230, "hellip")},
{ENTITY (8240, "permil")},
{ENTITY (8242, "prime")},
{ENTITY (8243, "Prime")},
{ENTITY (8249, "lsaquo")},
{ENTITY (8250, "rsaquo")},
{ENTITY (8254, "oline")},
{ENTITY (8260, "frasl")},
{ENTITY (8364, "euro")},
{ENTITY (8465, "image")},
{ENTITY (8472, "weierp")},
{ENTITY (8476, "real")},
{ENTITY (8482, "trade")},
{ENTITY (8501, "alefsym")},
{ENTITY (8592, "larr")},
{ENTITY (8593, "uarr")},
{ENTITY (8594, "rarr")},
{ENTITY (8595, "darr")},
{ENTITY (8596, "harr")},
{ENTITY (8629, "crarr")},
{ENTITY (8656, "lArr")},
{ENTITY (8657, "uArr")},
{ENTITY (8658, "rArr")},
{ENTITY (8659, "dArr")},
{ENTITY (8660, "hArr")},
{ENTITY (8704, "forall")},
{ENTITY (8706, "part")},
{ENTITY (8707, "exist")},
{ENTITY (8709, "empty")},
{ENTITY (8711, "nabla")},
{ENTITY (8712, "isin")},
{ENTITY (8713, "notin")},
{ENTITY (8715, "ni")},
{ENTITY (8719, "prod")},
{ENTITY (8721, "sum")},
{ENTITY (8722, "minus")},
{ENTITY (8727, "lowast")},
{ENTITY (8730, "radic")},
{ENTITY (8733, "prop")},
{ENTITY (8734, "infin")},
{ENTITY (8736, "ang")},
{ENTITY (8743, "and")},
{ENTITY (8744, "or")},
{ENTITY (8745, "cap")},
{ENTITY (8746, "cup")},
{ENTITY (8747, "int")},
{ENTITY (8756, "there4")},
{ENTITY (8764, "sim")},
{ENTITY (8773, "cong")},
{ENTITY (8776, "asymp")},
{ENTITY (8800, "ne")},
{ENTITY (8801, "equiv")},
{ENTITY (8804, "le")},
{ENTITY (8805, "ge")},
{ENTITY (8834, "sub")},
{ENTITY (8835, "sup")},
{ENTITY (8836, "nsub")},
{ENTITY (8838, "sube")},
{ENTITY (8839, "supe")},
{ENTITY (8853, "oplus")},
{ENTITY (8855, "otimes")},
{ENTITY (8869, "perp")},
{ENTITY (8901, "sdot")},
{ENTITY (8968, "lceil")},
{ENTITY (8969, "rceil")},
{ENTITY (8970, "lfloor")},
{ENTITY (8971, "rfloor")},
{ENTITY (9001, "lang")},
{ENTITY (9002, "rang")},
{ENTITY (9674, "loz")},
{ENTITY (9824, "spades")},
{ENTITY (9827, "clubs")},
{ENTITY (9829, "hearts")},
{ENTITY (9830, "diams")},
};
static gchar *
@ -377,24 +378,30 @@ unescape_string (const gchar * text)
}
/* pass xml entities. these will be processed as pango markup */
for (i = 0; XmlEntities[i].escaped; i++) {
gssize len = strlen (XmlEntities[i].escaped);
if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) {
for (i = 0; i < G_N_ELEMENTS (XmlEntities); i++) {
const EntityMap *entity = &XmlEntities[i];
guint8 escaped_len = entity->escaped_len;
if (!g_ascii_strncasecmp (text, entity->escaped, escaped_len)
&& text[escaped_len] == ';') {
unescaped = g_string_append_c (unescaped, '&');
unescaped =
g_string_append_len (unescaped, XmlEntities[i].escaped, len);
text += len;
g_string_append_len (unescaped, entity->escaped, escaped_len);
unescaped = g_string_append_c (unescaped, ';');
text += escaped_len + 1;
goto next;
}
}
/* convert html entities */
for (i = 0; HtmlEntities[i].escaped; i++) {
gssize len = strlen (HtmlEntities[i].escaped);
if (!strncmp (text, HtmlEntities[i].escaped, len)) {
unescaped =
g_string_append_unichar (unescaped, HtmlEntities[i].unescaped);
text += len;
for (i = 0; i < G_N_ELEMENTS (HtmlEntities); i++) {
const EntityMap *entity = &HtmlEntities[i];
guint8 escaped_len = entity->escaped_len;
if (!strncmp (text, entity->escaped, escaped_len)
&& text[escaped_len] == ';') {
unescaped = g_string_append_unichar (unescaped, entity->unescaped);
text += escaped_len + 1;
goto next;
}
}