add http_url class for parsing url

This commit is contained in:
shuxin   zheng 2021-06-08 16:45:49 +08:00
parent 1a48b07d9f
commit 40ddd8aee5
3 changed files with 204 additions and 6 deletions

View File

@ -1,5 +1,6 @@
#pragma once
#include "../acl_cpp_define.hpp"
#include "../stdlib/string.hpp"
namespace acl {
@ -28,6 +29,62 @@ public:
*/
static bool get_addr(const char* url, char* domain, size_t size,
unsigned short* port);
/**
* URL
* @param url {const char*} URL
* @param domain {string&}
* @param size {size_t} domain
* @param port {unsigned short*} url
* @param url_path {string&} url
* URLhttp://test.com.cn/cgi-bin/test?name=value 则只提取其中的字
* /cgi-bin/test
* @param url_params {string&} url
* @return {bool} url
*/
static bool parse_url(const char* url, string& domain,
unsigned short* port, string& url_path, string& url_params);
};
class ACL_CPP_API http_url {
public:
http_url(void);
~http_url(void) {}
bool parse(const char* url);
public:
const char* get_proto(void) const {
return proto_;
}
const char* get_domain(void) const {
return domain_.c_str();
}
unsigned short get_port(void) const {
return port_;
}
const char* get_url_path(void) const {
return url_path_.c_str();
}
const char* get_url_params(void) const {
return url_params_.c_str();
}
void reset(void);
private:
char proto_[16];
string domain_;
unsigned short port_;
string url_path_;
string url_params_;
bool parse_url_part(const char* url);
const char* parse_domain(const char* url);
};
} // namespace acl

View File

@ -4,6 +4,7 @@
#include "stdafx.h"
#include "acl_cpp/stdlib/string.hpp"
#include "acl_cpp/stdlib/url_coder.hpp"
#include "acl_cpp/http/http_utils.hpp"
using namespace acl;
@ -75,11 +76,45 @@ int main(void)
v0, found0 ? "found it" : "not found", coder5.get("n1"),
v3, found3 ? "found it" : "not found");
#ifdef WIN32
printf("enter any key to exit ...\r\n");
printf("enter any key to continue ...\r\n");
getchar();
printf("\r\n");
acl::http_url hu;
const char* urls[] = {
"http://www.google.com/",
"https://www.google.com/",
"https://www.google.com/test",
"http://www.google.com/test?name=value&name2=value2",
"/test",
"/",
"/test?",
"/test?name1=value1&name2=value2",
"/path/test",
"/path/test?name=value",
NULL,
};
for (size_t i = 0; urls[i] != NULL; i++) {
if (!hu.parse(urls[i])) {
printf("parse url=%s failed\r\n", urls[i]);
break;
}
printf("url:%s\r\n", urls[i]);
printf("proto=%s, port=%d, domain=%s, path=%s, params=%s\r\n",
hu.get_proto(), hu.get_port(), hu.get_domain(),
hu.get_url_path(), hu.get_url_params());
printf("\r\n");
hu.reset();
}
#if defined(_WIN32) || defined(_WIN64)
printf("Enter any key to exit ...\r\n");
getchar();
#endif
return 0;
}

View File

@ -19,13 +19,12 @@ bool http_utils::get_addr(const char* url, char* addr, size_t size)
safe_snprintf(addr, size, "%s:%d", buf, port);
return true;
}
#define HTTP_PREFIX "http://"
#define HTTPS_PREFIX "https://"
bool http_utils::get_addr(const char* url, char* domain, size_t size,
unsigned short* pport)
{
#define HTTP_PREFIX "http://"
#define HTTPS_PREFIX "https://"
const char* ptr;
unsigned short default_port;
@ -73,4 +72,111 @@ bool http_utils::get_addr(const char* url, char* domain, size_t size,
return true;
}
//////////////////////////////////////////////////////////////////////////////
http_url::http_url(void) {
ACL_SAFE_STRNCPY(proto_, "http", sizeof(proto_));
port_ = 80;
}
void http_url::reset(void) {
ACL_SAFE_STRNCPY(proto_, "http", sizeof(proto_));
port_ = 80;
domain_.clear();
url_path_.clear();
url_params_.clear();
}
bool http_url::parse(const char *url) {
const char* ptr;
if (!strncasecmp(url, HTTP_PREFIX, sizeof(HTTP_PREFIX) - 1)) {
ptr = url + sizeof(HTTP_PREFIX) - 1;
} else if (!strncasecmp(url, HTTPS_PREFIX, sizeof(HTTPS_PREFIX) - 1)) {
ptr = url + sizeof(HTTPS_PREFIX) - 1;
port_ = 443;
ACL_SAFE_STRNCPY(proto_, "https", sizeof(proto_));
} else if (*url == '/'){
ptr = url;
} else {
logger_error("invalid url: %s", url);
return false;
}
if (*ptr == 0) {
logger_error("invalid url: %s", url);
return false;
}
if (ptr == url) {
return parse_url_part(url);
} else {
ptr = parse_domain(ptr);
if (ptr == NULL) {
url_path_ = "/";
return true;
}
return parse_url_part(ptr);
}
}
#define SKIP_WHILE(cond, ptr) { while(*ptr && (cond)) ptr++; }
bool http_url::parse_url_part(const char *url) {
if (*url != '/') {
logger_error("invalid url: %s", url);
return false;
}
const char* ptr = url;
SKIP_WHILE(*ptr == '/', ptr);
if (*ptr == 0) {
url_path_ = "/";
return true;
}
const char* qm = strchr(ptr, '?');
if (qm == NULL) {
url_path_ = url;
return true;
}
url_path_.copy(url, qm - url);
++qm;
if (*qm != 0) {
url_params_ = qm;
}
return true;
}
const char* http_url::parse_domain(const char *url) {
if (*url == '/') {
logger_error("invalid url: %s", url);
return NULL;
}
const char* ptr = strchr(url, '/');
if (ptr == NULL) {
domain_ = url;
return NULL;
}
char buf[256];
size_t size = ptr - url + 1;
if (size > sizeof(buf)) { // xxx: sanity check
size = sizeof(buf);
}
ACL_SAFE_STRNCPY(buf, url, size);
// fixme: Is it error if buf contains IPV6 Addr ---zsx
char* col = strchr(buf, ':');
if (col != NULL) {
*col++ = 0;
port_ = (unsigned short) atoi(col);
if (port_ == 0 || port_ == 65535) {
port_ = 80;
}
}
domain_ = buf;
return ptr;
}
} // namespace acl