samiparse: micro-optimise entity handling

Avoid relocations and hard-code entity string length
in the struct, since we basically get it for free here.

Part-of: <https://gitlab.freedesktop.org/gstreamer/gstreamer/-/merge_requests/2685>
This commit is contained in:
Tim-Philipp Müller 2022-06-30 00:31:24 +01:00 committed by GStreamer Marge Bot
parent 84a3b0ef87
commit 270d23c8e0

View file

@ -88,272 +88,273 @@ html_context_free (HtmlContext * ctxt)
g_free (ctxt); g_free (ctxt);
} }
struct EntityMap typedef struct
{ {
const gunichar unescaped; gunichar unescaped:24;
const gchar *escaped; guint8 escaped_len;
gchar escaped[8];
} EntityMap;
#define ENTITY(unicode,ent) unicode,sizeof(ent)-1,ent
static const EntityMap XmlEntities[] = {
{ENTITY (34, "quot")},
{ENTITY (38, "amp")},
{ENTITY (39, "apos")},
{ENTITY (60, "lt")},
{ENTITY (62, "gt")},
}; };
struct EntityMap XmlEntities[] = { static const EntityMap HtmlEntities[] = {
{34, "quot;"}, /* nbsp we'll handle manually
{38, "amp;"},
{39, "apos;"},
{60, "lt;"},
{62, "gt;"},
{0, NULL},
};
struct EntityMap HtmlEntities[] = {
/* nbsp will handle manually
{ 160, "nbsp;" }, */ { 160, "nbsp;" }, */
{161, "iexcl;"}, {ENTITY (161, "iexcl")},
{162, "cent;"}, {ENTITY (162, "cent")},
{163, "pound;"}, {ENTITY (163, "pound")},
{164, "curren;"}, {ENTITY (164, "curren")},
{165, "yen;"}, {ENTITY (165, "yen")},
{166, "brvbar;"}, {ENTITY (166, "brvbar")},
{167, "sect;"}, {ENTITY (167, "sect")},
{168, "uml;"}, {ENTITY (168, "uml")},
{169, "copy;"}, {ENTITY (169, "copy")},
{170, "ordf;"}, {ENTITY (170, "ordf")},
{171, "laquo;"}, {ENTITY (171, "laquo")},
{172, "not;"}, {ENTITY (172, "not")},
{173, "shy;"}, {ENTITY (173, "shy")},
{174, "reg;"}, {ENTITY (174, "reg")},
{175, "macr;"}, {ENTITY (175, "macr")},
{176, "deg;"}, {ENTITY (176, "deg")},
{177, "plusmn;"}, {ENTITY (177, "plusmn")},
{178, "sup2;"}, {ENTITY (178, "sup2")},
{179, "sup3;"}, {ENTITY (179, "sup3")},
{180, "acute;"}, {ENTITY (180, "acute")},
{181, "micro;"}, {ENTITY (181, "micro")},
{182, "para;"}, {ENTITY (182, "para")},
{183, "middot;"}, {ENTITY (183, "middot")},
{184, "cedil;"}, {ENTITY (184, "cedil")},
{185, "sup1;"}, {ENTITY (185, "sup1")},
{186, "ordm;"}, {ENTITY (186, "ordm")},
{187, "raquo;"}, {ENTITY (187, "raquo")},
{188, "frac14;"}, {ENTITY (188, "frac14")},
{189, "frac12;"}, {ENTITY (189, "frac12")},
{190, "frac34;"}, {ENTITY (190, "frac34")},
{191, "iquest;"}, {ENTITY (191, "iquest")},
{192, "Agrave;"}, {ENTITY (192, "Agrave")},
{193, "Aacute;"}, {ENTITY (193, "Aacute")},
{194, "Acirc;"}, {ENTITY (194, "Acirc")},
{195, "Atilde;"}, {ENTITY (195, "Atilde")},
{196, "Auml;"}, {ENTITY (196, "Auml")},
{197, "Aring;"}, {ENTITY (197, "Aring")},
{198, "AElig;"}, {ENTITY (198, "AElig")},
{199, "Ccedil;"}, {ENTITY (199, "Ccedil")},
{200, "Egrave;"}, {ENTITY (200, "Egrave")},
{201, "Eacute;"}, {ENTITY (201, "Eacute")},
{202, "Ecirc;"}, {ENTITY (202, "Ecirc")},
{203, "Euml;"}, {ENTITY (203, "Euml")},
{204, "Igrave;"}, {ENTITY (204, "Igrave")},
{205, "Iacute;"}, {ENTITY (205, "Iacute")},
{206, "Icirc;"}, {ENTITY (206, "Icirc")},
{207, "Iuml;"}, {ENTITY (207, "Iuml")},
{208, "ETH;"}, {ENTITY (208, "ETH")},
{209, "Ntilde;"}, {ENTITY (209, "Ntilde")},
{210, "Ograve;"}, {ENTITY (210, "Ograve")},
{211, "Oacute;"}, {ENTITY (211, "Oacute")},
{212, "Ocirc;"}, {ENTITY (212, "Ocirc")},
{213, "Otilde;"}, {ENTITY (213, "Otilde")},
{214, "Ouml;"}, {ENTITY (214, "Ouml")},
{215, "times;"}, {ENTITY (215, "times")},
{216, "Oslash;"}, {ENTITY (216, "Oslash")},
{217, "Ugrave;"}, {ENTITY (217, "Ugrave")},
{218, "Uacute;"}, {ENTITY (218, "Uacute")},
{219, "Ucirc;"}, {ENTITY (219, "Ucirc")},
{220, "Uuml;"}, {ENTITY (220, "Uuml")},
{221, "Yacute;"}, {ENTITY (221, "Yacute")},
{222, "THORN;"}, {ENTITY (222, "THORN")},
{223, "szlig;"}, {ENTITY (223, "szlig")},
{224, "agrave;"}, {ENTITY (224, "agrave")},
{225, "aacute;"}, {ENTITY (225, "aacute")},
{226, "acirc;"}, {ENTITY (226, "acirc")},
{227, "atilde;"}, {ENTITY (227, "atilde")},
{228, "auml;"}, {ENTITY (228, "auml")},
{229, "aring;"}, {ENTITY (229, "aring")},
{230, "aelig;"}, {ENTITY (230, "aelig")},
{231, "ccedil;"}, {ENTITY (231, "ccedil")},
{232, "egrave;"}, {ENTITY (232, "egrave")},
{233, "eacute;"}, {ENTITY (233, "eacute")},
{234, "ecirc;"}, {ENTITY (234, "ecirc")},
{235, "euml;"}, {ENTITY (235, "euml")},
{236, "igrave;"}, {ENTITY (236, "igrave")},
{237, "iacute;"}, {ENTITY (237, "iacute")},
{238, "icirc;"}, {ENTITY (238, "icirc")},
{239, "iuml;"}, {ENTITY (239, "iuml")},
{240, "eth;"}, {ENTITY (240, "eth")},
{241, "ntilde;"}, {ENTITY (241, "ntilde")},
{242, "ograve;"}, {ENTITY (242, "ograve")},
{243, "oacute;"}, {ENTITY (243, "oacute")},
{244, "ocirc;"}, {ENTITY (244, "ocirc")},
{245, "otilde;"}, {ENTITY (245, "otilde")},
{246, "ouml;"}, {ENTITY (246, "ouml")},
{247, "divide;"}, {ENTITY (247, "divide")},
{248, "oslash;"}, {ENTITY (248, "oslash")},
{249, "ugrave;"}, {ENTITY (249, "ugrave")},
{250, "uacute;"}, {ENTITY (250, "uacute")},
{251, "ucirc;"}, {ENTITY (251, "ucirc")},
{252, "uuml;"}, {ENTITY (252, "uuml")},
{253, "yacute;"}, {ENTITY (253, "yacute")},
{254, "thorn;"}, {ENTITY (254, "thorn")},
{255, "yuml;"}, {ENTITY (255, "yuml")},
{338, "OElig;"}, {ENTITY (338, "OElig")},
{339, "oelig;"}, {ENTITY (339, "oelig")},
{352, "Scaron;"}, {ENTITY (352, "Scaron")},
{353, "scaron;"}, {ENTITY (353, "scaron")},
{376, "Yuml;"}, {ENTITY (376, "Yuml")},
{402, "fnof;"}, {ENTITY (402, "fnof")},
{710, "circ;"}, {ENTITY (710, "circ")},
{732, "tilde;"}, {ENTITY (732, "tilde")},
{913, "Alpha;"}, {ENTITY (913, "Alpha")},
{914, "Beta;"}, {ENTITY (914, "Beta")},
{915, "Gamma;"}, {ENTITY (915, "Gamma")},
{916, "Delta;"}, {ENTITY (916, "Delta")},
{917, "Epsilon;"}, {ENTITY (917, "Epsilon")},
{918, "Zeta;"}, {ENTITY (918, "Zeta")},
{919, "Eta;"}, {ENTITY (919, "Eta")},
{920, "Theta;"}, {ENTITY (920, "Theta")},
{921, "Iota;"}, {ENTITY (921, "Iota")},
{922, "Kappa;"}, {ENTITY (922, "Kappa")},
{923, "Lambda;"}, {ENTITY (923, "Lambda")},
{924, "Mu;"}, {ENTITY (924, "Mu")},
{925, "Nu;"}, {ENTITY (925, "Nu")},
{926, "Xi;"}, {ENTITY (926, "Xi")},
{927, "Omicron;"}, {ENTITY (927, "Omicron")},
{928, "Pi;"}, {ENTITY (928, "Pi")},
{929, "Rho;"}, {ENTITY (929, "Rho")},
{931, "Sigma;"}, {ENTITY (931, "Sigma")},
{932, "Tau;"}, {ENTITY (932, "Tau")},
{933, "Upsilon;"}, {ENTITY (933, "Upsilon")},
{934, "Phi;"}, {ENTITY (934, "Phi")},
{935, "Chi;"}, {ENTITY (935, "Chi")},
{936, "Psi;"}, {ENTITY (936, "Psi")},
{937, "Omega;"}, {ENTITY (937, "Omega")},
{945, "alpha;"}, {ENTITY (945, "alpha")},
{946, "beta;"}, {ENTITY (946, "beta")},
{947, "gamma;"}, {ENTITY (947, "gamma")},
{948, "delta;"}, {ENTITY (948, "delta")},
{949, "epsilon;"}, {ENTITY (949, "epsilon")},
{950, "zeta;"}, {ENTITY (950, "zeta")},
{951, "eta;"}, {ENTITY (951, "eta")},
{952, "theta;"}, {ENTITY (952, "theta")},
{953, "iota;"}, {ENTITY (953, "iota")},
{954, "kappa;"}, {ENTITY (954, "kappa")},
{955, "lambda;"}, {ENTITY (955, "lambda")},
{956, "mu;"}, {ENTITY (956, "mu")},
{957, "nu;"}, {ENTITY (957, "nu")},
{958, "xi;"}, {ENTITY (958, "xi")},
{959, "omicron;"}, {ENTITY (959, "omicron")},
{960, "pi;"}, {ENTITY (960, "pi")},
{961, "rho;"}, {ENTITY (961, "rho")},
{962, "sigmaf;"}, {ENTITY (962, "sigmaf")},
{963, "sigma;"}, {ENTITY (963, "sigma")},
{964, "tau;"}, {ENTITY (964, "tau")},
{965, "upsilon;"}, {ENTITY (965, "upsilon")},
{966, "phi;"}, {ENTITY (966, "phi")},
{967, "chi;"}, {ENTITY (967, "chi")},
{968, "psi;"}, {ENTITY (968, "psi")},
{969, "omega;"}, {ENTITY (969, "omega")},
{977, "thetasym;"}, {ENTITY (977, "thetasym")},
{978, "upsih;"}, {ENTITY (978, "upsih")},
{982, "piv;"}, {ENTITY (982, "piv")},
{8194, "ensp;"}, {ENTITY (8194, "ensp")},
{8195, "emsp;"}, {ENTITY (8195, "emsp")},
{8201, "thinsp;"}, {ENTITY (8201, "thinsp")},
{8204, "zwnj;"}, {ENTITY (8204, "zwnj")},
{8205, "zwj;"}, {ENTITY (8205, "zwj")},
{8206, "lrm;"}, {ENTITY (8206, "lrm")},
{8207, "rlm;"}, {ENTITY (8207, "rlm")},
{8211, "ndash;"}, {ENTITY (8211, "ndash")},
{8212, "mdash;"}, {ENTITY (8212, "mdash")},
{8216, "lsquo;"}, {ENTITY (8216, "lsquo")},
{8217, "rsquo;"}, {ENTITY (8217, "rsquo")},
{8218, "sbquo;"}, {ENTITY (8218, "sbquo")},
{8220, "ldquo;"}, {ENTITY (8220, "ldquo")},
{8221, "rdquo;"}, {ENTITY (8221, "rdquo")},
{8222, "bdquo;"}, {ENTITY (8222, "bdquo")},
{8224, "dagger;"}, {ENTITY (8224, "dagger")},
{8225, "Dagger;"}, {ENTITY (8225, "Dagger")},
{8226, "bull;"}, {ENTITY (8226, "bull")},
{8230, "hellip;"}, {ENTITY (8230, "hellip")},
{8240, "permil;"}, {ENTITY (8240, "permil")},
{8242, "prime;"}, {ENTITY (8242, "prime")},
{8243, "Prime;"}, {ENTITY (8243, "Prime")},
{8249, "lsaquo;"}, {ENTITY (8249, "lsaquo")},
{8250, "rsaquo;"}, {ENTITY (8250, "rsaquo")},
{8254, "oline;"}, {ENTITY (8254, "oline")},
{8260, "frasl;"}, {ENTITY (8260, "frasl")},
{8364, "euro;"}, {ENTITY (8364, "euro")},
{8465, "image;"}, {ENTITY (8465, "image")},
{8472, "weierp;"}, {ENTITY (8472, "weierp")},
{8476, "real;"}, {ENTITY (8476, "real")},
{8482, "trade;"}, {ENTITY (8482, "trade")},
{8501, "alefsym;"}, {ENTITY (8501, "alefsym")},
{8592, "larr;"}, {ENTITY (8592, "larr")},
{8593, "uarr;"}, {ENTITY (8593, "uarr")},
{8594, "rarr;"}, {ENTITY (8594, "rarr")},
{8595, "darr;"}, {ENTITY (8595, "darr")},
{8596, "harr;"}, {ENTITY (8596, "harr")},
{8629, "crarr;"}, {ENTITY (8629, "crarr")},
{8656, "lArr;"}, {ENTITY (8656, "lArr")},
{8657, "uArr;"}, {ENTITY (8657, "uArr")},
{8658, "rArr;"}, {ENTITY (8658, "rArr")},
{8659, "dArr;"}, {ENTITY (8659, "dArr")},
{8660, "hArr;"}, {ENTITY (8660, "hArr")},
{8704, "forall;"}, {ENTITY (8704, "forall")},
{8706, "part;"}, {ENTITY (8706, "part")},
{8707, "exist;"}, {ENTITY (8707, "exist")},
{8709, "empty;"}, {ENTITY (8709, "empty")},
{8711, "nabla;"}, {ENTITY (8711, "nabla")},
{8712, "isin;"}, {ENTITY (8712, "isin")},
{8713, "notin;"}, {ENTITY (8713, "notin")},
{8715, "ni;"}, {ENTITY (8715, "ni")},
{8719, "prod;"}, {ENTITY (8719, "prod")},
{8721, "sum;"}, {ENTITY (8721, "sum")},
{8722, "minus;"}, {ENTITY (8722, "minus")},
{8727, "lowast;"}, {ENTITY (8727, "lowast")},
{8730, "radic;"}, {ENTITY (8730, "radic")},
{8733, "prop;"}, {ENTITY (8733, "prop")},
{8734, "infin;"}, {ENTITY (8734, "infin")},
{8736, "ang;"}, {ENTITY (8736, "ang")},
{8743, "and;"}, {ENTITY (8743, "and")},
{8744, "or;"}, {ENTITY (8744, "or")},
{8745, "cap;"}, {ENTITY (8745, "cap")},
{8746, "cup;"}, {ENTITY (8746, "cup")},
{8747, "int;"}, {ENTITY (8747, "int")},
{8756, "there4;"}, {ENTITY (8756, "there4")},
{8764, "sim;"}, {ENTITY (8764, "sim")},
{8773, "cong;"}, {ENTITY (8773, "cong")},
{8776, "asymp;"}, {ENTITY (8776, "asymp")},
{8800, "ne;"}, {ENTITY (8800, "ne")},
{8801, "equiv;"}, {ENTITY (8801, "equiv")},
{8804, "le;"}, {ENTITY (8804, "le")},
{8805, "ge;"}, {ENTITY (8805, "ge")},
{8834, "sub;"}, {ENTITY (8834, "sub")},
{8835, "sup;"}, {ENTITY (8835, "sup")},
{8836, "nsub;"}, {ENTITY (8836, "nsub")},
{8838, "sube;"}, {ENTITY (8838, "sube")},
{8839, "supe;"}, {ENTITY (8839, "supe")},
{8853, "oplus;"}, {ENTITY (8853, "oplus")},
{8855, "otimes;"}, {ENTITY (8855, "otimes")},
{8869, "perp;"}, {ENTITY (8869, "perp")},
{8901, "sdot;"}, {ENTITY (8901, "sdot")},
{8968, "lceil;"}, {ENTITY (8968, "lceil")},
{8969, "rceil;"}, {ENTITY (8969, "rceil")},
{8970, "lfloor;"}, {ENTITY (8970, "lfloor")},
{8971, "rfloor;"}, {ENTITY (8971, "rfloor")},
{9001, "lang;"}, {ENTITY (9001, "lang")},
{9002, "rang;"}, {ENTITY (9002, "rang")},
{9674, "loz;"}, {ENTITY (9674, "loz")},
{9824, "spades;"}, {ENTITY (9824, "spades")},
{9827, "clubs;"}, {ENTITY (9827, "clubs")},
{9829, "hearts;"}, {ENTITY (9829, "hearts")},
{9830, "diams;"}, {ENTITY (9830, "diams")},
{0, NULL},
}; };
static gchar * static gchar *
@ -377,24 +378,30 @@ unescape_string (const gchar * text)
} }
/* pass xml entities. these will be processed as pango markup */ /* pass xml entities. these will be processed as pango markup */
for (i = 0; XmlEntities[i].escaped; i++) { for (i = 0; i < G_N_ELEMENTS (XmlEntities); i++) {
gssize len = strlen (XmlEntities[i].escaped); const EntityMap *entity = &XmlEntities[i];
if (!g_ascii_strncasecmp (text, XmlEntities[i].escaped, len)) { guint8 escaped_len = entity->escaped_len;
if (!g_ascii_strncasecmp (text, entity->escaped, escaped_len)
&& text[escaped_len] == ';') {
unescaped = g_string_append_c (unescaped, '&'); unescaped = g_string_append_c (unescaped, '&');
unescaped = unescaped =
g_string_append_len (unescaped, XmlEntities[i].escaped, len); g_string_append_len (unescaped, entity->escaped, escaped_len);
text += len; unescaped = g_string_append_c (unescaped, ';');
text += escaped_len + 1;
goto next; goto next;
} }
} }
/* convert html entities */ /* convert html entities */
for (i = 0; HtmlEntities[i].escaped; i++) { for (i = 0; i < G_N_ELEMENTS (HtmlEntities); i++) {
gssize len = strlen (HtmlEntities[i].escaped); const EntityMap *entity = &HtmlEntities[i];
if (!strncmp (text, HtmlEntities[i].escaped, len)) { guint8 escaped_len = entity->escaped_len;
unescaped =
g_string_append_unichar (unescaped, HtmlEntities[i].unescaped); if (!strncmp (text, entity->escaped, escaped_len)
text += len; && text[escaped_len] == ';') {
unescaped = g_string_append_unichar (unescaped, entity->unescaped);
text += escaped_len + 1;
goto next; goto next;
} }
} }