diff --git a/lib_acl_cpp/include/acl_cpp/http/http_utils.hpp b/lib_acl_cpp/include/acl_cpp/http/http_utils.hpp index 90e0f64bb..1041f5d05 100644 --- a/lib_acl_cpp/include/acl_cpp/http/http_utils.hpp +++ b/lib_acl_cpp/include/acl_cpp/http/http_utils.hpp @@ -1,5 +1,6 @@ #pragma once #include "../acl_cpp_define.hpp" +#include "../stdlib/string.hpp" namespace acl { @@ -28,6 +29,62 @@ public: */ static bool get_addr(const char* url, char* domain, size_t size, unsigned short* port); + + /** + * 解析输入的完整或部分 URL + * @param url {const char*} 非空完整或部分 URL 字符串 + * @param domain {string&} 用来存放域名地址信息 + * @param size {size_t} domain 缓冲区大小 + * @param port {unsigned short*} 用来存放 url 中的端口号 + * @param url_path {string&} 用来存放 url 中不含域名和参数的部分,即针对类似 + * 于 URL:http://test.com.cn/cgi-bin/test?name=value 则只提取其中的字 + * 符串:/cgi-bin/test + * @param url_params {string&} 用来存放 url 中的参数部分字符串 + * @return {bool} 解析 url 是否成功 + */ + static bool parse_url(const char* url, string& domain, + unsigned short* port, string& url_path, string& url_params); +}; + +class ACL_CPP_API http_url { +public: + http_url(void); + ~http_url(void) {} + + bool parse(const char* url); + +public: + const char* get_proto(void) const { + return proto_; + } + + const char* get_domain(void) const { + return domain_.c_str(); + } + + unsigned short get_port(void) const { + return port_; + } + + const char* get_url_path(void) const { + return url_path_.c_str(); + } + + const char* get_url_params(void) const { + return url_params_.c_str(); + } + + void reset(void); + +private: + char proto_[16]; + string domain_; + unsigned short port_; + string url_path_; + string url_params_; + + bool parse_url_part(const char* url); + const char* parse_domain(const char* url); }; } // namespace acl diff --git a/lib_acl_cpp/samples/url_coder/url_coder.cpp b/lib_acl_cpp/samples/url_coder/url_coder.cpp index 7359c7741..2b576446d 100644 --- a/lib_acl_cpp/samples/url_coder/url_coder.cpp +++ b/lib_acl_cpp/samples/url_coder/url_coder.cpp @@ -4,6 +4,7 @@ #include "stdafx.h" #include "acl_cpp/stdlib/string.hpp" #include "acl_cpp/stdlib/url_coder.hpp" +#include "acl_cpp/http/http_utils.hpp" using namespace acl; @@ -75,11 +76,45 @@ int main(void) v0, found0 ? "found it" : "not found", coder5.get("n1"), v3, found3 ? "found it" : "not found"); -#ifdef WIN32 - printf("enter any key to exit ...\r\n"); + printf("enter any key to continue ...\r\n"); + getchar(); + + printf("\r\n"); + + acl::http_url hu; + const char* urls[] = { + "http://www.google.com/", + "https://www.google.com/", + "https://www.google.com/test", + "http://www.google.com/test?name=value&name2=value2", + "/test", + "/", + "/test?", + "/test?name1=value1&name2=value2", + "/path/test", + "/path/test?name=value", + NULL, + }; + + for (size_t i = 0; urls[i] != NULL; i++) { + if (!hu.parse(urls[i])) { + printf("parse url=%s failed\r\n", urls[i]); + break; + } + + printf("url:%s\r\n", urls[i]); + printf("proto=%s, port=%d, domain=%s, path=%s, params=%s\r\n", + hu.get_proto(), hu.get_port(), hu.get_domain(), + hu.get_url_path(), hu.get_url_params()); + printf("\r\n"); + + hu.reset(); + } + +#if defined(_WIN32) || defined(_WIN64) + printf("Enter any key to exit ...\r\n"); getchar(); #endif - return 0; } diff --git a/lib_acl_cpp/src/http/http_utils.cpp b/lib_acl_cpp/src/http/http_utils.cpp index 07ac66061..e29bcacaa 100644 --- a/lib_acl_cpp/src/http/http_utils.cpp +++ b/lib_acl_cpp/src/http/http_utils.cpp @@ -19,13 +19,12 @@ bool http_utils::get_addr(const char* url, char* addr, size_t size) safe_snprintf(addr, size, "%s:%d", buf, port); return true; } +#define HTTP_PREFIX "http://" +#define HTTPS_PREFIX "https://" bool http_utils::get_addr(const char* url, char* domain, size_t size, unsigned short* pport) { -#define HTTP_PREFIX "http://" -#define HTTPS_PREFIX "https://" - const char* ptr; unsigned short default_port; @@ -73,4 +72,111 @@ bool http_utils::get_addr(const char* url, char* domain, size_t size, return true; } +////////////////////////////////////////////////////////////////////////////// + +http_url::http_url(void) { + ACL_SAFE_STRNCPY(proto_, "http", sizeof(proto_)); + port_ = 80; +} + +void http_url::reset(void) { + ACL_SAFE_STRNCPY(proto_, "http", sizeof(proto_)); + port_ = 80; + domain_.clear(); + url_path_.clear(); + url_params_.clear(); +} + +bool http_url::parse(const char *url) { + const char* ptr; + + if (!strncasecmp(url, HTTP_PREFIX, sizeof(HTTP_PREFIX) - 1)) { + ptr = url + sizeof(HTTP_PREFIX) - 1; + } else if (!strncasecmp(url, HTTPS_PREFIX, sizeof(HTTPS_PREFIX) - 1)) { + ptr = url + sizeof(HTTPS_PREFIX) - 1; + port_ = 443; + ACL_SAFE_STRNCPY(proto_, "https", sizeof(proto_)); + } else if (*url == '/'){ + ptr = url; + } else { + logger_error("invalid url: %s", url); + return false; + } + + if (*ptr == 0) { + logger_error("invalid url: %s", url); + return false; + } + + if (ptr == url) { + return parse_url_part(url); + } else { + ptr = parse_domain(ptr); + if (ptr == NULL) { + url_path_ = "/"; + return true; + } + return parse_url_part(ptr); + } +} + +#define SKIP_WHILE(cond, ptr) { while(*ptr && (cond)) ptr++; } + +bool http_url::parse_url_part(const char *url) { + if (*url != '/') { + logger_error("invalid url: %s", url); + return false; + } + + const char* ptr = url; + SKIP_WHILE(*ptr == '/', ptr); + if (*ptr == 0) { + url_path_ = "/"; + return true; + } + + const char* qm = strchr(ptr, '?'); + if (qm == NULL) { + url_path_ = url; + return true; + } + url_path_.copy(url, qm - url); + ++qm; + if (*qm != 0) { + url_params_ = qm; + } + return true; +} + +const char* http_url::parse_domain(const char *url) { + if (*url == '/') { + logger_error("invalid url: %s", url); + return NULL; + } + const char* ptr = strchr(url, '/'); + if (ptr == NULL) { + domain_ = url; + return NULL; + } + + char buf[256]; + size_t size = ptr - url + 1; + if (size > sizeof(buf)) { // xxx: sanity check + size = sizeof(buf); + } + ACL_SAFE_STRNCPY(buf, url, size); + + // fixme: Is it error if buf contains IPV6 Addr ---zsx + char* col = strchr(buf, ':'); + if (col != NULL) { + *col++ = 0; + port_ = (unsigned short) atoi(col); + if (port_ == 0 || port_ == 65535) { + port_ = 80; + } + } + domain_ = buf; + return ptr; +} + } // namespace acl