#include "lib_acl.h" static struct { unsigned short id; char *entity; } html_charset[] = { { 32, " " }, { 34, """ }, { 34, """ }, { 38, "&" }, { 38, "&" }, { 39, "'" }, { 60, "<" }, { 60, "<" }, { 62, ">" }, { 62, ">" }, { 160, " " }, { 161, "¡" }, { 162, "¢" }, { 163, "£" }, { 164, "¤" }, { 165, "¥" }, { 166, "¦" }, { 167, "§" }, { 168, "¨" }, { 169, "©" }, { 170, "ª" }, { 171, "«" }, { 172, "¬" }, { 173, "­" }, { 174, "®" }, { 175, "¯" }, { 176, "°" }, { 177, "±" }, { 178, "²" }, { 179, "³" }, { 180, "´" }, { 181, "µ" }, { 182, "¶" }, { 183, "·" }, { 184, "¸" }, { 185, "¹" }, { 186, "º" }, { 187, "»" }, { 188, "¼" }, { 189, "½" }, { 190, "¾" }, { 191, "¿" }, { 192, "À" }, { 193, "Á" }, { 194, "Â" }, { 195, "Ã" }, { 196, "Ä" }, { 197, "Å" }, { 198, "Æ" }, { 199, "Ç" }, { 200, "È" }, { 201, "É" }, { 202, "Ê" }, { 203, "Ë" }, { 204, "Ì" }, { 205, "Í" }, { 206, "Î" }, { 207, "Ï" }, { 208, "Ð" }, { 209, "Ñ" }, { 210, "Ò" }, { 211, "Ó" }, { 212, "Ô" }, { 213, "Õ" }, { 214, "Ö" }, { 215, "×" }, { 216, "Ø" }, { 217, "Ù" }, { 218, "Ú" }, { 219, "Û" }, { 220, "Ü" }, { 221, "Ý" }, { 222, "Þ" }, { 223, "ß" }, { 224, "à" }, { 225, "á" }, { 226, "â" }, { 227, "ã" }, { 228, "ä" }, { 229, "å" }, { 230, "æ" }, { 231, "ç" }, { 232, "è" }, { 233, "é" }, { 234, "ê" }, { 235, "ë" }, { 236, "ì" }, { 237, "í" }, { 238, "î" }, { 239, "ï" }, { 240, "ð" }, { 241, "ñ" }, { 242, "ò" }, { 243, "ó" }, { 244, "ô" }, { 245, "õ" }, { 246, "ö" }, { 247, "÷" }, { 248, "ø" }, { 249, "ù" }, { 250, "ú" }, { 251, "û" }, { 252, "ü" }, { 253, "ý" }, { 254, "þ" }, { 255, "ÿ" }, { 338, "Œ" }, { 339, "œ" }, { 352, "Š" }, { 353, "š" }, { 376, "Ÿ" }, { 402, "ƒ" }, { 710, "ˆ" }, { 732, "˜" }, { 913, "Α" }, { 914, "Β" }, { 915, "Γ" }, { 916, "Δ" }, { 917, "Ε" }, { 918, "Ζ" }, { 919, "Η" }, { 920, "Θ" }, { 921, "Ι" }, { 922, "Κ" }, { 923, "Λ" }, { 924, "Μ" }, { 925, "Ν" }, { 926, "Ξ" }, { 927, "Ο" }, { 928, "Π" }, { 929, "Ρ" }, { 931, "Σ" }, { 932, "Τ" }, { 933, "Υ" }, { 934, "Φ" }, { 935, "Χ" }, { 936, "Ψ" }, { 937, "Ω" }, { 945, "α" }, { 946, "β" }, { 947, "γ" }, { 948, "δ" }, { 949, "ε" }, { 950, "ζ" }, { 951, "η" }, { 952, "θ" }, { 953, "ι" }, { 954, "κ" }, { 955, "λ" }, { 956, "μ" }, { 957, "ν" }, { 958, "ξ" }, { 959, "ο" }, { 960, "π" }, { 961, "ρ" }, { 962, "ς" }, { 963, "σ" }, { 964, "τ" }, { 965, "υ" }, { 966, "φ" }, { 967, "χ" }, { 968, "ψ" }, { 969, "ω" }, { 977, "&thetasym" }, { 978, "ϒ" }, { 982, "ϖ" }, {8194, " " }, {8195, " " }, { 8201, " " }, {8204, "‌" }, {8205, "‍" }, {8206, "‎" }, { 8207, "‏" }, {8211, "–" }, {8212, "—" }, {8216, "‘" }, { 8217, "’" }, {8218, "‚" }, {8220, "“" }, {8221, "”" }, { 8222, "„" }, {8224, "†" }, {8225, "‡" }, {8226, "•" }, { 8230, "…" }, {8240, "‰" }, {8242, "′" }, {8243, "″" }, { 8249, "‹" }, {8250, "›" }, {8254, "‾" }, {8260, "⁄" }, { 8364, "€" }, {8465, "ℑ" }, {8472, "℘" }, {8476, "ℜ" }, { 8482, "™" }, {8501, "ℵ" }, {8592, "←" }, {8593, "↑" }, { 8594, "→" }, {8595, "↓" }, {8596, "↔" }, {8629, "↵" }, { 8656, "⇐" }, {8657, "⇑" }, {8658, "⇒" }, {8659, "⇓" }, { 8660, "⇔" }, {8704, "∀" }, {8706, "∂" }, {8707, "∃" }, { 8709, "∅" }, {8711, "∇" }, {8712, "∈" }, {8713, "∉" }, { 8715, "∋" }, {8719, "∏" }, {8721, "∑" }, {8722, "−" }, { 8727, "∗" }, {8730, "√" }, {8733, "∝" }, {8734, "∞" }, { 8736, "∠" }, {8743, "∧" }, {8744, "∨" }, {8745, "∩" }, { 8746, "∪" }, {8747, "∫" }, {8756, "∴" }, {8764, "∼" }, { 8773, "≅" }, {8776, "≈" }, {8800, "≠" }, {8801, "≡" }, { 8804, "≤" }, {8805, "≥" }, {8834, "⊂" }, {8835, "⊃" }, { 8836, "⊄" }, {8838, "⊆" }, {8839, "⊇" }, {8853, "⊕" }, { 8855, "⊗" }, {8869, "⊥" }, {8901, "⋅" }, {8968, "⌈" }, { 8969, "⌉" }, {8970, "⌊" }, {8971, "⌋" }, {9001, "⟨" }, { 9002, "⟩" }, {9674, "◊" }, {9824, "♠" }, {9827, "♣" }, { 9829, "♥" }, {9830, "♦" } }; static void test_map2(void) { unsigned int i, n; unsigned short k; unsigned char h, l; n = sizeof(html_charset) / sizeof(html_charset[0]); for (i = 0; i < n; i++) { k = html_charset[i].id; h = (unsigned char) (k >> 8); l = (unsigned char) (k & 0xFF) ; if (k > 255 && (h == 0 || l == 0)) printf("id: %d = 0\n", html_charset[i].id); } } static void create_map2(void) { unsigned int i, j, n; n = sizeof(html_charset) / sizeof(html_charset[0]); for (i = 0; i < n; i++) { const char *ptr = html_charset[i].entity; if (ptr == NULL) continue; for (j = 0; j < i; j++) { if (html_charset[j].entity == NULL) continue; if (strcmp(ptr, html_charset[j].entity) == 0) { html_charset[j].entity = NULL; break; } } } printf("#ifndef __HTML_CHARSET_INCLUDE_H__\n"); printf("#define __HTML_CHARSET_INCLUDE_H__\n"); printf("\n"); printf("static const char *html_charmap[] = {\n"); for (i = 0; i < 65535; i++) { if (i > 0) { if (i % 8 == 0) printf(",\n\t"); else printf(", "); } else printf("\t"); for (j = 0; j < n; j++) { if (html_charset[j].entity && html_charset[j].id == i) break; } if (j < n) printf("\"%s\"", html_charset[j].entity); else printf("NULL"); } printf("\n};\n"); printf("\ntypedef struct {\n" "\tunsigned short ch;\n" "\tconst char *txt;\n" "\tsize_t len;\n" "} HTML_SPEC;\n" "\n" "static const HTML_SPEC html_tab[] = {\n" ); for (i = 0; i < n; i++) { if (html_charset[i].entity == NULL) continue; printf("\t{ %u, \"%s\", sizeof(\"%s\") - 1 },\n", html_charset[i].id, html_charset[i].entity, html_charset[i].entity); } printf("};\n\n"); printf("#endif\n"); } /*----------------------------------------------------------------------------*/ typedef struct HTML_SPEC_TBL { const char ch; const char *txt; } HTML_SPEC_TBL; static const HTML_SPEC_TBL __tab[] = { { '<', "<" }, { '>', ">" }, { '&', "&" }, { '\'', "'" }, { '\"', """ }, { 0, 0 } }; static void create_map1(void) { int i, j; for (i = 0; i < 255; i++) { if (i > 0) { if (i % 8 == 0) printf(",\r\n"); else printf(", "); } for (j = 0; __tab[j].ch; j++) { if (__tab[j].ch == i) break; } if (__tab[j].ch) printf("\"%s\"", __tab[j].txt); else printf("NULL"); } printf("\r\n"); } int main(void) { const char *src = "%E4%B8%AD%E5%9B%BD%0A%E4%B8%AD%E5%9B%BD%0A%E4%B8%AD%E5%9B%BD"; /* char buf[256], *dst; */ char *dst; if (0) { create_map1(); return (0); } else if (1) { create_map2(); return (0); } else if (2) { test_map2(); return (0); } printf("src(%s)\n", src); dst = acl_url_decode(src); printf("dst(%s)\n", dst); /* if (acl_utf8_to_gb2312(dst, strlen(dst), buf, sizeof(buf)) > 0) printf("ok, buf(%s)\n", buf); else printf("error\n"); src = "中国"; if (acl_utf8_to_gb2312(src, strlen(src), buf, sizeof(buf)) > 0) printf("ok, buf(%s)\n", buf); else printf("error\n"); */ return (0); }