2014-11-19 00:25:21 +08:00
|
|
|
#include "lib_acl.h"
|
|
|
|
|
|
|
|
static struct {
|
|
|
|
unsigned short id;
|
|
|
|
char *entity;
|
|
|
|
} html_charset[] = {
|
|
|
|
{ 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" },
|
|
|
|
{ 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" },
|
|
|
|
{ 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" },
|
|
|
|
{ 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" },
|
|
|
|
{ 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" },
|
|
|
|
{ 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "­" },
|
|
|
|
{ 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" },
|
|
|
|
{ 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" },
|
|
|
|
{ 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" },
|
|
|
|
{ 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" },
|
|
|
|
{ 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" },
|
|
|
|
{ 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" },
|
|
|
|
{ 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" },
|
|
|
|
{ 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" },
|
|
|
|
{ 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" },
|
|
|
|
{ 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" },
|
|
|
|
{ 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" },
|
|
|
|
{ 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" },
|
|
|
|
{ 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" },
|
|
|
|
{ 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" },
|
|
|
|
{ 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" },
|
|
|
|
{ 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" },
|
|
|
|
{ 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" },
|
|
|
|
{ 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" },
|
|
|
|
{ 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" },
|
|
|
|
{ 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" },
|
|
|
|
{ 254, "þ" }, { 255, "ÿ" }, { 338, "Œ" }, { 339, "œ" },
|
|
|
|
{ 352, "Š" }, { 353, "š" }, { 376, "Ÿ" }, { 402, "ƒ" },
|
|
|
|
{ 710, "ˆ" }, { 732, "˜" }, { 913, "Α" }, { 914, "Β" },
|
|
|
|
{ 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" },
|
|
|
|
{ 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" },
|
|
|
|
{ 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" },
|
|
|
|
{ 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" },
|
|
|
|
{ 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" },
|
|
|
|
{ 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" },
|
|
|
|
{ 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" },
|
|
|
|
{ 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" },
|
|
|
|
{ 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" },
|
|
|
|
{ 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "ς" },
|
|
|
|
{ 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" },
|
|
|
|
{ 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" },
|
|
|
|
{ 978, "ϒ" }, { 982, "ϖ" }, {8194, " " }, {8195, " " },
|
|
|
|
{ 8201, " " }, {8204, "‌" }, {8205, "‍" }, {8206, "‎" },
|
|
|
|
{ 8207, "‏" }, {8211, "–" }, {8212, "—" }, {8216, "‘" },
|
|
|
|
{ 8217, "’" }, {8218, "‚" }, {8220, "“" }, {8221, "”" },
|
|
|
|
{ 8222, "„" }, {8224, "†" }, {8225, "‡" }, {8226, "•" },
|
|
|
|
{ 8230, "…" }, {8240, "‰" }, {8242, "′" }, {8243, "″" },
|
|
|
|
{ 8249, "‹" }, {8250, "›" }, {8254, "‾" }, {8260, "⁄" },
|
|
|
|
{ 8364, "€" }, {8465, "ℑ" }, {8472, "℘" }, {8476, "ℜ" },
|
|
|
|
{ 8482, "™" }, {8501, "ℵ" }, {8592, "←" }, {8593, "↑" },
|
|
|
|
{ 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "↵" },
|
|
|
|
{ 8656, "⇐" }, {8657, "⇑" }, {8658, "⇒" }, {8659, "⇓" },
|
|
|
|
{ 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" },
|
|
|
|
{ 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" },
|
|
|
|
{ 8715, "∋" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" },
|
|
|
|
{ 8727, "∗" }, {8730, "√" }, {8733, "∝" }, {8734, "∞" },
|
|
|
|
{ 8736, "∠" }, {8743, "∧" }, {8744, "∨" }, {8745, "∩" },
|
|
|
|
{ 8746, "∪" }, {8747, "∫" }, {8756, "∴" }, {8764, "∼" },
|
|
|
|
{ 8773, "≅" }, {8776, "≈" }, {8800, "≠" }, {8801, "≡" },
|
|
|
|
{ 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" },
|
|
|
|
{ 8836, "⊄" }, {8838, "⊆" }, {8839, "⊇" }, {8853, "⊕" },
|
|
|
|
{ 8855, "⊗" }, {8869, "⊥" }, {8901, "⋅" }, {8968, "⌈" },
|
|
|
|
{ 8969, "⌉" }, {8970, "⌊" }, {8971, "⌋" }, {9001, "⟨" },
|
|
|
|
{ 9002, "⟩" }, {9674, "◊" }, {9824, "♠" }, {9827, "♣" },
|
|
|
|
{ 9829, "♥" }, {9830, "♦" }
|
|
|
|
};
|
|
|
|
|
|
|
|
static void test_map2(void)
|
|
|
|
{
|
|
|
|
unsigned int i, n;
|
|
|
|
unsigned short k;
|
|
|
|
unsigned char h, l;
|
|
|
|
|
|
|
|
n = sizeof(html_charset) / sizeof(html_charset[0]);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
k = html_charset[i].id;
|
|
|
|
h = (unsigned char) (k >> 8);
|
|
|
|
l = (unsigned char) (k & 0xFF) ;
|
|
|
|
if (k > 255 && (h == 0 || l == 0))
|
|
|
|
printf("id: %d = 0\n", html_charset[i].id);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void create_map2(void)
|
|
|
|
{
|
|
|
|
unsigned int i, j, n;
|
|
|
|
|
|
|
|
n = sizeof(html_charset) / sizeof(html_charset[0]);
|
|
|
|
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
const char *ptr = html_charset[i].entity;
|
|
|
|
if (ptr == NULL)
|
|
|
|
continue;
|
|
|
|
for (j = 0; j < i; j++) {
|
|
|
|
if (html_charset[j].entity == NULL)
|
|
|
|
continue;
|
|
|
|
if (strcmp(ptr, html_charset[j].entity) == 0) {
|
|
|
|
html_charset[j].entity = NULL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("#ifndef __HTML_CHARSET_INCLUDE_H__\n");
|
|
|
|
printf("#define __HTML_CHARSET_INCLUDE_H__\n");
|
|
|
|
printf("\n");
|
|
|
|
|
|
|
|
printf("static const char *html_charmap[] = {\n");
|
|
|
|
for (i = 0; i < 65535; i++) {
|
|
|
|
if (i > 0) {
|
|
|
|
if (i % 8 == 0)
|
|
|
|
printf(",\n\t");
|
|
|
|
else
|
|
|
|
printf(", ");
|
|
|
|
} else
|
|
|
|
printf("\t");
|
|
|
|
for (j = 0; j < n; j++) {
|
|
|
|
if (html_charset[j].entity && html_charset[j].id == i)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (j < n)
|
|
|
|
printf("\"%s\"", html_charset[j].entity);
|
|
|
|
else
|
|
|
|
printf("NULL");
|
|
|
|
}
|
|
|
|
printf("\n};\n");
|
|
|
|
|
|
|
|
printf("\ntypedef struct {\n"
|
|
|
|
"\tunsigned short ch;\n"
|
|
|
|
"\tconst char *txt;\n"
|
|
|
|
"\tsize_t len;\n"
|
|
|
|
"} HTML_SPEC;\n"
|
|
|
|
"\n"
|
|
|
|
"static const HTML_SPEC html_tab[] = {\n"
|
|
|
|
);
|
|
|
|
for (i = 0; i < n; i++) {
|
|
|
|
if (html_charset[i].entity == NULL)
|
|
|
|
continue;
|
|
|
|
printf("\t{ %u, \"%s\", sizeof(\"%s\") - 1 },\n",
|
|
|
|
html_charset[i].id, html_charset[i].entity, html_charset[i].entity);
|
|
|
|
}
|
|
|
|
printf("};\n\n");
|
|
|
|
printf("#endif\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
/*----------------------------------------------------------------------------*/
|
|
|
|
|
|
|
|
typedef struct HTML_SPEC_TBL
|
|
|
|
{
|
|
|
|
const char ch;
|
|
|
|
const char *txt;
|
|
|
|
} HTML_SPEC_TBL;
|
|
|
|
|
|
|
|
static const HTML_SPEC_TBL __tab[] = {
|
|
|
|
{ '<', "<" },
|
|
|
|
{ '>', ">" },
|
|
|
|
{ '&', "&" },
|
|
|
|
{ '\'', "'" },
|
|
|
|
{ '\"', """ },
|
|
|
|
{ 0, 0 }
|
|
|
|
};
|
|
|
|
|
|
|
|
static void create_map1(void)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
|
|
|
|
for (i = 0; i < 255; i++) {
|
|
|
|
if (i > 0) {
|
|
|
|
if (i % 8 == 0)
|
|
|
|
printf(",\r\n");
|
|
|
|
else
|
|
|
|
printf(", ");
|
|
|
|
}
|
|
|
|
for (j = 0; __tab[j].ch; j++) {
|
|
|
|
if (__tab[j].ch == i)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (__tab[j].ch)
|
|
|
|
printf("\"%s\"", __tab[j].txt);
|
|
|
|
else
|
|
|
|
printf("NULL");
|
|
|
|
}
|
|
|
|
printf("\r\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(void)
|
|
|
|
{
|
|
|
|
const char *src = "%E4%B8%AD%E5%9B%BD%0A%E4%B8%AD%E5%9B%BD%0A%E4%B8%AD%E5%9B%BD";
|
|
|
|
/*
|
|
|
|
char buf[256], *dst;
|
|
|
|
*/
|
|
|
|
char *dst;
|
|
|
|
|
|
|
|
if (0) {
|
|
|
|
create_map1();
|
|
|
|
return (0);
|
|
|
|
} else if (1) {
|
|
|
|
create_map2();
|
|
|
|
return (0);
|
|
|
|
} else if (2) {
|
|
|
|
test_map2();
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
printf("src(%s)\n", src);
|
|
|
|
dst = acl_url_decode(src);
|
|
|
|
printf("dst(%s)\n", dst);
|
|
|
|
|
|
|
|
/*
|
|
|
|
if (acl_utf8_to_gb2312(dst, strlen(dst), buf, sizeof(buf)) > 0)
|
|
|
|
printf("ok, buf(%s)\n", buf);
|
|
|
|
else
|
|
|
|
printf("error\n");
|
|
|
|
|
|
|
|
src = "中国";
|
|
|
|
if (acl_utf8_to_gb2312(src, strlen(src), buf, sizeof(buf)) > 0)
|
|
|
|
printf("ok, buf(%s)\n", buf);
|
|
|
|
else
|
|
|
|
printf("error\n");
|
|
|
|
*/
|
|
|
|
return (0);
|
|
|
|
}
|