mirror of
https://github.com/wallabag/wallabag.git
synced 2024-11-01 14:49:15 +00:00
413 lines
12 KiB
PHP
413 lines
12 KiB
PHP
<?php
|
||
|
||
/**
|
||
* Description of CharacterEntities
|
||
*
|
||
* @author Sander
|
||
*/
|
||
class CharacterEntities {
|
||
public static function convert($str){
|
||
//Assume the encoding is UTF-8 -> output is UTF-8
|
||
return $str;
|
||
//return utf8_encode($str);
|
||
//Convert to CP1252
|
||
list($from, $to) = CharacterEntities::generateTables();
|
||
return str_replace($from, $to, $str);
|
||
}
|
||
|
||
private static function generateTables(){
|
||
$from = array();
|
||
$to = array();
|
||
|
||
for($i = 0; $i < 256; $i++){
|
||
$from[$i] = $to[$i] = chr($i);
|
||
}
|
||
|
||
$from[0x80] = "€";
|
||
$from[0x82] = "‚";
|
||
$from[0x83] = "ƒ";
|
||
$from[0x84] = "„";
|
||
$from[0x85] = "…";
|
||
$from[0x86] = "†";
|
||
$from[0x87] = "‡";
|
||
$from[0x88] = "ˆ";
|
||
$from[0x89] = "‰";
|
||
$from[0x8A] = "Š";
|
||
$from[0x8B] = "‹";
|
||
$from[0x8C] = "Œ";
|
||
$from[0x8E] = "Ž";
|
||
|
||
$from[0x91] = "‘";
|
||
$from[0x92] = "’";
|
||
$from[0x93] = "“";
|
||
$from[0x94] = "”";
|
||
$from[0x95] = "•";
|
||
$from[0x96] = "–";
|
||
$from[0x97] = "—";
|
||
$from[0x98] = "˜";
|
||
$from[0x99] = "™";
|
||
$from[0x9A] = "š";
|
||
$from[0x9B] = "›";
|
||
$from[0x9C] = "œ";
|
||
$from[0x9E] = "ž";
|
||
$from[0x9F] = "Ÿ";
|
||
|
||
$from[0xA1] = "¡";
|
||
$from[0xA2] = "¢";
|
||
$from[0xA3] = "£";
|
||
$from[0xA4] = "¤";
|
||
$from[0xA5] = "¥";
|
||
$from[0xA6] = "¦";
|
||
$from[0xA7] = "§";
|
||
$from[0xA8] = "¨";
|
||
$from[0xA9] = "©";
|
||
$from[0xAA] = "ª";
|
||
$from[0xAB] = "«";
|
||
$from[0xAC] = "¬";
|
||
$from[0xAE] = "®";
|
||
$from[0xAF] = "¯";
|
||
|
||
$from[0xB0] = "°";
|
||
$from[0xB1] = "±";
|
||
$from[0xB2] = "²";
|
||
$from[0xB3] = "³";
|
||
$from[0xB4] = "´";
|
||
$from[0xB5] = "µ";
|
||
$from[0xB6] = "¶";
|
||
$from[0xB7] = "·";
|
||
$from[0xB8] = "¸";
|
||
$from[0xB9] = "¹";
|
||
$from[0xBA] = "º";
|
||
$from[0xBB] = "»";
|
||
$from[0xBC] = "¼";
|
||
$from[0xBD] = "½";
|
||
$from[0xBE] = "¾";
|
||
$from[0xBF] = "¿";
|
||
|
||
$from[0xC0] = "À";
|
||
$from[0xC1] = "Á";
|
||
$from[0xC2] = "Â";
|
||
$from[0xC3] = "Ã";
|
||
$from[0xC4] = "Ä";
|
||
$from[0xC5] = "Å";
|
||
$from[0xC6] = "Æ";
|
||
$from[0xC7] = "Ç";
|
||
$from[0xC8] = "È";
|
||
$from[0xC9] = "É";
|
||
$from[0xCA] = "Ê";
|
||
$from[0xCB] = "Ë";
|
||
$from[0xCC] = "Ì";
|
||
$from[0xCD] = "Í";
|
||
$from[0xCE] = "Î";
|
||
$from[0xCF] = "Ï";
|
||
|
||
$from[0xD0] = "Ð";
|
||
$from[0xD1] = "Ñ";
|
||
$from[0xD2] = "Ò";
|
||
$from[0xD3] = "Ó";
|
||
$from[0xD4] = "Ô";
|
||
$from[0xD5] = "Õ";
|
||
$from[0xD6] = "Ö";
|
||
$from[0xD7] = "×";
|
||
$from[0xD8] = "Ø";
|
||
$from[0xD9] = "Ù";
|
||
$from[0xDA] = "Ú";
|
||
$from[0xDB] = "Û";
|
||
$from[0xDC] = "Ü";
|
||
$from[0xDD] = "Ý";
|
||
$from[0xDE] = "Þ";
|
||
$from[0xDF] = "ß";
|
||
|
||
$from[0xE0] = "à";
|
||
$from[0xE1] = "á";
|
||
$from[0xE2] = "â";
|
||
$from[0xE3] = "ã";
|
||
$from[0xE4] = "ä";
|
||
$from[0xE5] = "å";
|
||
$from[0xE6] = "æ";
|
||
$from[0xE7] = "ç";
|
||
$from[0xE8] = "è";
|
||
$from[0xE9] = "é";
|
||
$from[0xEA] = "ê";
|
||
$from[0xEB] = "ë";
|
||
$from[0xEC] = "ì";
|
||
$from[0xED] = "í";
|
||
$from[0xEE] = "î";
|
||
$from[0xEF] = "ï";
|
||
|
||
$from[0xF0] = "ð";
|
||
$from[0xF1] = "ñ";
|
||
$from[0xF2] = "ò";
|
||
$from[0xF3] = "ó";
|
||
$from[0xF4] = "ô";
|
||
$from[0xF5] = "õ";
|
||
$from[0xF6] = "ö";
|
||
$from[0xF7] = "÷";
|
||
$from[0xF8] = "ø";
|
||
$from[0xF9] = "ù";
|
||
$from[0xFA] = "ú";
|
||
$from[0xFB] = "û";
|
||
$from[0xFC] = "ü";
|
||
$from[0xFD] = "ý";
|
||
$from[0xFE] = "þ";
|
||
$from[0xFF] = "ÿ";
|
||
|
||
|
||
return array($from, $to);
|
||
}
|
||
/*
|
||
00 = U+0000 : NULL
|
||
01 = U+0001 : START OF HEADING
|
||
02 = U+0002 : START OF TEXT
|
||
03 = U+0003 : END OF TEXT
|
||
04 = U+0004 : END OF TRANSMISSION
|
||
05 = U+0005 : ENQUIRY
|
||
06 = U+0006 : ACKNOWLEDGE
|
||
07 = U+0007 : BELL
|
||
08 = U+0008 : BACKSPACE
|
||
09 = U+0009 : HORIZONTAL TABULATION
|
||
0A = U+000A : LINE FEED
|
||
0B = U+000B : VERTICAL TABULATION
|
||
0C = U+000C : FORM FEED
|
||
0D = U+000D : CARRIAGE RETURN
|
||
0E = U+000E : SHIFT OUT
|
||
0F = U+000F : SHIFT IN
|
||
10 = U+0010 : DATA LINK ESCAPE
|
||
11 = U+0011 : DEVICE CONTROL ONE
|
||
12 = U+0012 : DEVICE CONTROL TWO
|
||
13 = U+0013 : DEVICE CONTROL THREE
|
||
14 = U+0014 : DEVICE CONTROL FOUR
|
||
15 = U+0015 : NEGATIVE ACKNOWLEDGE
|
||
16 = U+0016 : SYNCHRONOUS IDLE
|
||
17 = U+0017 : END OF TRANSMISSION BLOCK
|
||
18 = U+0018 : CANCEL
|
||
19 = U+0019 : END OF MEDIUM
|
||
1A = U+001A : SUBSTITUTE
|
||
1B = U+001B : ESCAPE
|
||
1C = U+001C : FILE SEPARATOR
|
||
1D = U+001D : GROUP SEPARATOR
|
||
1E = U+001E : RECORD SEPARATOR
|
||
1F = U+001F : UNIT SEPARATOR
|
||
20 = U+0020 : SPACE
|
||
21 = U+0021 : EXCLAMATION MARK
|
||
22 = U+0022 : QUOTATION MARK
|
||
23 = U+0023 : NUMBER SIGN
|
||
24 = U+0024 : DOLLAR SIGN
|
||
25 = U+0025 : PERCENT SIGN
|
||
26 = U+0026 : AMPERSAND
|
||
27 = U+0027 : APOSTROPHE
|
||
28 = U+0028 : LEFT PARENTHESIS
|
||
29 = U+0029 : RIGHT PARENTHESIS
|
||
2A = U+002A : ASTERISK
|
||
2B = U+002B : PLUS SIGN
|
||
2C = U+002C : COMMA
|
||
2D = U+002D : HYPHEN-MINUS
|
||
2E = U+002E : FULL STOP
|
||
2F = U+002F : SOLIDUS
|
||
30 = U+0030 : DIGIT ZERO
|
||
31 = U+0031 : DIGIT ONE
|
||
32 = U+0032 : DIGIT TWO
|
||
33 = U+0033 : DIGIT THREE
|
||
34 = U+0034 : DIGIT FOUR
|
||
35 = U+0035 : DIGIT FIVE
|
||
36 = U+0036 : DIGIT SIX
|
||
37 = U+0037 : DIGIT SEVEN
|
||
38 = U+0038 : DIGIT EIGHT
|
||
39 = U+0039 : DIGIT NINE
|
||
3A = U+003A : COLON
|
||
3B = U+003B : SEMICOLON
|
||
3C = U+003C : LESS-THAN SIGN
|
||
3D = U+003D : EQUALS SIGN
|
||
3E = U+003E : GREATER-THAN SIGN
|
||
3F = U+003F : QUESTION MARK
|
||
40 = U+0040 : COMMERCIAL AT
|
||
41 = U+0041 : LATIN CAPITAL LETTER A
|
||
42 = U+0042 : LATIN CAPITAL LETTER B
|
||
43 = U+0043 : LATIN CAPITAL LETTER C
|
||
44 = U+0044 : LATIN CAPITAL LETTER D
|
||
45 = U+0045 : LATIN CAPITAL LETTER E
|
||
46 = U+0046 : LATIN CAPITAL LETTER F
|
||
47 = U+0047 : LATIN CAPITAL LETTER G
|
||
48 = U+0048 : LATIN CAPITAL LETTER H
|
||
49 = U+0049 : LATIN CAPITAL LETTER I
|
||
4A = U+004A : LATIN CAPITAL LETTER J
|
||
4B = U+004B : LATIN CAPITAL LETTER K
|
||
4C = U+004C : LATIN CAPITAL LETTER L
|
||
4D = U+004D : LATIN CAPITAL LETTER M
|
||
4E = U+004E : LATIN CAPITAL LETTER N
|
||
4F = U+004F : LATIN CAPITAL LETTER O
|
||
50 = U+0050 : LATIN CAPITAL LETTER P
|
||
51 = U+0051 : LATIN CAPITAL LETTER Q
|
||
52 = U+0052 : LATIN CAPITAL LETTER R
|
||
53 = U+0053 : LATIN CAPITAL LETTER S
|
||
54 = U+0054 : LATIN CAPITAL LETTER T
|
||
55 = U+0055 : LATIN CAPITAL LETTER U
|
||
56 = U+0056 : LATIN CAPITAL LETTER V
|
||
57 = U+0057 : LATIN CAPITAL LETTER W
|
||
58 = U+0058 : LATIN CAPITAL LETTER X
|
||
59 = U+0059 : LATIN CAPITAL LETTER Y
|
||
5A = U+005A : LATIN CAPITAL LETTER Z
|
||
5B = U+005B : LEFT SQUARE BRACKET
|
||
5C = U+005C : REVERSE SOLIDUS
|
||
5D = U+005D : RIGHT SQUARE BRACKET
|
||
5E = U+005E : CIRCUMFLEX ACCENT
|
||
5F = U+005F : LOW LINE
|
||
60 = U+0060 : GRAVE ACCENT
|
||
61 = U+0061 : LATIN SMALL LETTER A
|
||
62 = U+0062 : LATIN SMALL LETTER B
|
||
63 = U+0063 : LATIN SMALL LETTER C
|
||
64 = U+0064 : LATIN SMALL LETTER D
|
||
65 = U+0065 : LATIN SMALL LETTER E
|
||
66 = U+0066 : LATIN SMALL LETTER F
|
||
67 = U+0067 : LATIN SMALL LETTER G
|
||
68 = U+0068 : LATIN SMALL LETTER H
|
||
69 = U+0069 : LATIN SMALL LETTER I
|
||
6A = U+006A : LATIN SMALL LETTER J
|
||
6B = U+006B : LATIN SMALL LETTER K
|
||
6C = U+006C : LATIN SMALL LETTER L
|
||
6D = U+006D : LATIN SMALL LETTER M
|
||
6E = U+006E : LATIN SMALL LETTER N
|
||
6F = U+006F : LATIN SMALL LETTER O
|
||
70 = U+0070 : LATIN SMALL LETTER P
|
||
71 = U+0071 : LATIN SMALL LETTER Q
|
||
72 = U+0072 : LATIN SMALL LETTER R
|
||
73 = U+0073 : LATIN SMALL LETTER S
|
||
74 = U+0074 : LATIN SMALL LETTER T
|
||
75 = U+0075 : LATIN SMALL LETTER U
|
||
76 = U+0076 : LATIN SMALL LETTER V
|
||
77 = U+0077 : LATIN SMALL LETTER W
|
||
78 = U+0078 : LATIN SMALL LETTER X
|
||
79 = U+0079 : LATIN SMALL LETTER Y
|
||
7A = U+007A : LATIN SMALL LETTER Z
|
||
7B = U+007B : LEFT CURLY BRACKET
|
||
7C = U+007C : VERTICAL LINE
|
||
7D = U+007D : RIGHT CURLY BRACKET
|
||
7E = U+007E : TILDE
|
||
7F = U+007F : DELETE
|
||
80 = U+20AC : EURO SIGN
|
||
82 = U+201A : SINGLE LOW-9 QUOTATION MARK
|
||
83 = U+0192 : LATIN SMALL LETTER F WITH HOOK
|
||
84 = U+201E : DOUBLE LOW-9 QUOTATION MARK
|
||
85 = U+2026 : HORIZONTAL ELLIPSIS
|
||
86 = U+2020 : DAGGER
|
||
87 = U+2021 : DOUBLE DAGGER
|
||
88 = U+02C6 : MODIFIER LETTER CIRCUMFLEX ACCENT
|
||
89 = U+2030 : PER MILLE SIGN
|
||
8A = U+0160 : LATIN CAPITAL LETTER S WITH CARON
|
||
8B = U+2039 : SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||
8C = U+0152 : LATIN CAPITAL LIGATURE OE
|
||
8E = U+017D : LATIN CAPITAL LETTER Z WITH CARON
|
||
91 = U+2018 : LEFT SINGLE QUOTATION MARK
|
||
92 = U+2019 : RIGHT SINGLE QUOTATION MARK
|
||
93 = U+201C : LEFT DOUBLE QUOTATION MARK
|
||
94 = U+201D : RIGHT DOUBLE QUOTATION MARK
|
||
95 = U+2022 : BULLET
|
||
96 = U+2013 : EN DASH
|
||
97 = U+2014 : EM DASH
|
||
98 = U+02DC : SMALL TILDE
|
||
99 = U+2122 : TRADE MARK SIGN
|
||
9A = U+0161 : LATIN SMALL LETTER S WITH CARON
|
||
9B = U+203A : SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||
9C = U+0153 : LATIN SMALL LIGATURE OE
|
||
9E = U+017E : LATIN SMALL LETTER Z WITH CARON
|
||
9F = U+0178 : LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||
A0 = U+00A0 : NO-BREAK SPACE
|
||
A1 = U+00A1 : INVERTED EXCLAMATION MARK
|
||
A2 = U+00A2 : CENT SIGN
|
||
A3 = U+00A3 : POUND SIGN
|
||
A4 = U+00A4 : CURRENCY SIGN
|
||
A5 = U+00A5 : YEN SIGN
|
||
A6 = U+00A6 : BROKEN BAR
|
||
A7 = U+00A7 : SECTION SIGN
|
||
A8 = U+00A8 : DIAERESIS
|
||
A9 = U+00A9 : COPYRIGHT SIGN
|
||
AA = U+00AA : FEMININE ORDINAL INDICATOR
|
||
AB = U+00AB : LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||
AC = U+00AC : NOT SIGN
|
||
AD = U+00AD : SOFT HYPHEN
|
||
AE = U+00AE : REGISTERED SIGN
|
||
AF = U+00AF : MACRON
|
||
B0 = U+00B0 : DEGREE SIGN
|
||
B1 = U+00B1 : PLUS-MINUS SIGN
|
||
B2 = U+00B2 : SUPERSCRIPT TWO
|
||
B3 = U+00B3 : SUPERSCRIPT THREE
|
||
B4 = U+00B4 : ACUTE ACCENT
|
||
B5 = U+00B5 : MICRO SIGN
|
||
B6 = U+00B6 : PILCROW SIGN
|
||
B7 = U+00B7 : MIDDLE DOT
|
||
B8 = U+00B8 : CEDILLA
|
||
B9 = U+00B9 : SUPERSCRIPT ONE
|
||
BA = U+00BA : MASCULINE ORDINAL INDICATOR
|
||
BB = U+00BB : RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||
BC = U+00BC : VULGAR FRACTION ONE QUARTER
|
||
BD = U+00BD : VULGAR FRACTION ONE HALF
|
||
BE = U+00BE : VULGAR FRACTION THREE QUARTERS
|
||
BF = U+00BF : INVERTED QUESTION MARK
|
||
C0 = U+00C0 : LATIN CAPITAL LETTER A WITH GRAVE
|
||
C1 = U+00C1 : LATIN CAPITAL LETTER A WITH ACUTE
|
||
C2 = U+00C2 : LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||
C3 = U+00C3 : LATIN CAPITAL LETTER A WITH TILDE
|
||
C4 = U+00C4 : LATIN CAPITAL LETTER A WITH DIAERESIS
|
||
C5 = U+00C5 : LATIN CAPITAL LETTER A WITH RING ABOVE
|
||
C6 = U+00C6 : LATIN CAPITAL LETTER AE
|
||
C7 = U+00C7 : LATIN CAPITAL LETTER C WITH CEDILLA
|
||
C8 = U+00C8 : LATIN CAPITAL LETTER E WITH GRAVE
|
||
C9 = U+00C9 : LATIN CAPITAL LETTER E WITH ACUTE
|
||
CA = U+00CA : LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||
CB = U+00CB : LATIN CAPITAL LETTER E WITH DIAERESIS
|
||
CC = U+00CC : LATIN CAPITAL LETTER I WITH GRAVE
|
||
CD = U+00CD : LATIN CAPITAL LETTER I WITH ACUTE
|
||
CE = U+00CE : LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||
CF = U+00CF : LATIN CAPITAL LETTER I WITH DIAERESIS
|
||
D0 = U+00D0 : LATIN CAPITAL LETTER ETH
|
||
D1 = U+00D1 : LATIN CAPITAL LETTER N WITH TILDE
|
||
D2 = U+00D2 : LATIN CAPITAL LETTER O WITH GRAVE
|
||
D3 = U+00D3 : LATIN CAPITAL LETTER O WITH ACUTE
|
||
D4 = U+00D4 : LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||
D5 = U+00D5 : LATIN CAPITAL LETTER O WITH TILDE
|
||
D6 = U+00D6 : LATIN CAPITAL LETTER O WITH DIAERESIS
|
||
D7 = U+00D7 : MULTIPLICATION SIGN
|
||
D8 = U+00D8 : LATIN CAPITAL LETTER O WITH STROKE
|
||
D9 = U+00D9 : LATIN CAPITAL LETTER U WITH GRAVE
|
||
DA = U+00DA : LATIN CAPITAL LETTER U WITH ACUTE
|
||
DB = U+00DB : LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||
DC = U+00DC : LATIN CAPITAL LETTER U WITH DIAERESIS
|
||
DD = U+00DD : LATIN CAPITAL LETTER Y WITH ACUTE
|
||
DE = U+00DE : LATIN CAPITAL LETTER THORN
|
||
DF = U+00DF : LATIN SMALL LETTER SHARP S
|
||
E0 = U+00E0 : LATIN SMALL LETTER A WITH GRAVE
|
||
E1 = U+00E1 : LATIN SMALL LETTER A WITH ACUTE
|
||
E2 = U+00E2 : LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||
E3 = U+00E3 : LATIN SMALL LETTER A WITH TILDE
|
||
E4 = U+00E4 : LATIN SMALL LETTER A WITH DIAERESIS
|
||
E5 = U+00E5 : LATIN SMALL LETTER A WITH RING ABOVE
|
||
E6 = U+00E6 : LATIN SMALL LETTER AE
|
||
E7 = U+00E7 : LATIN SMALL LETTER C WITH CEDILLA
|
||
E8 = U+00E8 : LATIN SMALL LETTER E WITH GRAVE
|
||
E9 = U+00E9 : LATIN SMALL LETTER E WITH ACUTE
|
||
EA = U+00EA : LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||
EB = U+00EB : LATIN SMALL LETTER E WITH DIAERESIS
|
||
EC = U+00EC : LATIN SMALL LETTER I WITH GRAVE
|
||
ED = U+00ED : LATIN SMALL LETTER I WITH ACUTE
|
||
EE = U+00EE : LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||
EF = U+00EF : LATIN SMALL LETTER I WITH DIAERESIS
|
||
F0 = U+00F0 : LATIN SMALL LETTER ETH
|
||
F1 = U+00F1 : LATIN SMALL LETTER N WITH TILDE
|
||
F2 = U+00F2 : LATIN SMALL LETTER O WITH GRAVE
|
||
F3 = U+00F3 : LATIN SMALL LETTER O WITH ACUTE
|
||
F4 = U+00F4 : LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||
F5 = U+00F5 : LATIN SMALL LETTER O WITH TILDE
|
||
F6 = U+00F6 : LATIN SMALL LETTER O WITH DIAERESIS
|
||
F7 = U+00F7 : DIVISION SIGN
|
||
F8 = U+00F8 : LATIN SMALL LETTER O WITH STROKE
|
||
F9 = U+00F9 : LATIN SMALL LETTER U WITH GRAVE
|
||
FA = U+00FA : LATIN SMALL LETTER U WITH ACUTE
|
||
FB = U+00FB : LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||
FC = U+00FC : LATIN SMALL LETTER U WITH DIAERESIS
|
||
FD = U+00FD : LATIN SMALL LETTER Y WITH ACUTE
|
||
FE = U+00FE : LATIN SMALL LETTER THORN
|
||
FF = U+00FF : LATIN SMALL LETTER Y WITH DIAERESIS
|
||
*
|
||
*/
|
||
}
|
||
?>
|