Added generic URL parsing class
This commit is contained in:
parent
6d33ba2cbd
commit
e57cf42352
2 changed files with 87 additions and 1 deletions
|
@ -4,6 +4,82 @@
|
||||||
#include "http_parser.h"
|
#include "http_parser.h"
|
||||||
#include "encode.h"
|
#include "encode.h"
|
||||||
#include "timing.h"
|
#include "timing.h"
|
||||||
|
#include "defines.h"
|
||||||
|
|
||||||
|
/// Helper function to check if the given c-string is numeric or not
|
||||||
|
static bool is_numeric(const char * str){
|
||||||
|
while (str != 0){
|
||||||
|
if (str[0] < 48 || str[0] > 57){return false;}
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
///Constructor that does the actual parsing
|
||||||
|
HTTP::URL::URL(const std::string & url){
|
||||||
|
//first detect protocol at the start, if any
|
||||||
|
size_t proto_sep = url.find("://");
|
||||||
|
if (proto_sep != std::string::npos){
|
||||||
|
protocol = url.substr(0, proto_sep);
|
||||||
|
proto_sep += 3;
|
||||||
|
}else{
|
||||||
|
proto_sep = 0;
|
||||||
|
}
|
||||||
|
//proto_sep now points to the start of the host, guaranteed
|
||||||
|
//continue by finding the path, if any
|
||||||
|
size_t first_slash = url.find('/', proto_sep);
|
||||||
|
if (first_slash != std::string::npos){
|
||||||
|
path = url.substr(first_slash+1);
|
||||||
|
}
|
||||||
|
//host and port are now definitely between proto_sep and first_slash
|
||||||
|
//we check for [ at the start because we may have an IPv6 address as host
|
||||||
|
if (url[proto_sep] == '['){
|
||||||
|
//IPv6 address - find matching brace
|
||||||
|
size_t closing_brace = url.find(']', proto_sep);
|
||||||
|
//check if it exists at all
|
||||||
|
if (closing_brace == std::string::npos || closing_brace > first_slash){
|
||||||
|
//assume host ends at first slash if there is no closing brace before it
|
||||||
|
closing_brace = first_slash;
|
||||||
|
}
|
||||||
|
host = url.substr(proto_sep+1, closing_brace-(proto_sep+1));
|
||||||
|
//continue by finding port, if any
|
||||||
|
size_t colon = url.rfind(':', first_slash);
|
||||||
|
if (colon == std::string::npos || colon <= closing_brace){
|
||||||
|
//no port. Assume 80
|
||||||
|
port = "80";
|
||||||
|
}else{
|
||||||
|
//we have a port number, read it
|
||||||
|
port = url.substr(colon+1, first_slash-(colon+1));
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
//"normal" host - first find port, if any
|
||||||
|
size_t colon = url.rfind(':', first_slash);
|
||||||
|
if (colon == std::string::npos || colon < proto_sep){
|
||||||
|
//no port. Assume 80
|
||||||
|
port = "80";
|
||||||
|
host = url.substr(proto_sep, first_slash-proto_sep);
|
||||||
|
}else{
|
||||||
|
//we have a port number, read it
|
||||||
|
port = url.substr(colon+1, first_slash-(colon+1));
|
||||||
|
host = url.substr(proto_sep, colon-proto_sep);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//if the host is numeric, assume it is a port, instead
|
||||||
|
if (is_numeric(host.c_str())){
|
||||||
|
port = host;
|
||||||
|
host = "";
|
||||||
|
}
|
||||||
|
EXTREME_MSG("URL host: %s", host.c_str());
|
||||||
|
EXTREME_MSG("URL protocol: %s", protocol.c_str());
|
||||||
|
EXTREME_MSG("URL port: %s", port.c_str());
|
||||||
|
EXTREME_MSG("URL path: %s", path.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
///Returns the port in numeric format
|
||||||
|
uint32_t HTTP::URL::getPort() const{
|
||||||
|
if (!port.size()){return 80;}
|
||||||
|
return atoi(port.c_str());
|
||||||
|
}
|
||||||
|
|
||||||
/// This constructor creates an empty HTTP::Parser, ready for use for either reading or writing.
|
/// This constructor creates an empty HTTP::Parser, ready for use for either reading or writing.
|
||||||
/// All this constructor does is call HTTP::Parser::Clean().
|
/// All this constructor does is call HTTP::Parser::Clean().
|
||||||
|
|
|
@ -68,6 +68,16 @@ namespace HTTP {
|
||||||
std::map<std::string, std::string> vars;
|
std::map<std::string, std::string> vars;
|
||||||
void Trim(std::string & s);
|
void Trim(std::string & s);
|
||||||
};
|
};
|
||||||
//HTTP::Parser class
|
|
||||||
|
///URL parsing class. Parses full URL into its subcomponents
|
||||||
|
class URL {
|
||||||
|
public:
|
||||||
|
URL(const std::string & url);
|
||||||
|
uint32_t getPort() const;
|
||||||
|
std::string host;///< Hostname or IP address of URL
|
||||||
|
std::string protocol;///<Protocol of URL
|
||||||
|
std::string port;///<Port of URL
|
||||||
|
std::string path;///<Path after the first slash, not inclusive
|
||||||
|
};
|
||||||
|
|
||||||
}//HTTP namespace
|
}//HTTP namespace
|
||||||
|
|
Loading…
Add table
Reference in a new issue