Split HTTP namespace up into http_parser.h and url.h, since the URL library can be useful stand-alone as well.

This commit is contained in:
Thulinma 2020-02-28 11:33:12 +01:00
parent e0dfc3a4db
commit 793f6f7809
7 changed files with 307 additions and 283 deletions

View file

@ -138,6 +138,7 @@ set(libHeaders
lib/ebml.h lib/ebml.h
lib/ebml_socketglue.h lib/ebml_socketglue.h
lib/websocket.h lib/websocket.h
lib/url.h
) )
######################################## ########################################
@ -181,6 +182,7 @@ add_library (mist
lib/ebml.cpp lib/ebml.cpp
lib/ebml_socketglue.cpp lib/ebml_socketglue.cpp
lib/websocket.cpp lib/websocket.cpp
lib/url.cpp
) )
if (NOT APPLE) if (NOT APPLE)
set (LIBRT -lrt) set (LIBRT -lrt)

View file

@ -1,4 +1,5 @@
#include "http_parser.h" #include "http_parser.h"
#include "url.h"
#include "socket.h" #include "socket.h"
namespace HTTP{ namespace HTTP{

View file

@ -7,268 +7,9 @@
#include "defines.h" #include "defines.h"
#include "encode.h" #include "encode.h"
#include "timing.h" #include "timing.h"
#include "url.h"
#include <iomanip> #include <iomanip>
/// Helper function to check if the given c-string is numeric or not
static bool is_numeric(const char *str){
while (str[0] != 0){
if (str[0] < 48 || str[0] > 57){return false;}
++str;
}
return true;
}
/// Constructor that does the actual parsing
HTTP::URL::URL(const std::string &url){
IPv6Addr = false;
// first detect protocol at the start, if any
size_t proto_sep = url.find("://");
if (proto_sep != std::string::npos){
protocol = url.substr(0, proto_sep);
proto_sep += 3;
}else{
proto_sep = 0;
if (url.substr(0, 2) == "//"){proto_sep = 2;}
}
// proto_sep now points to the start of the host, guaranteed
// continue by finding the path, if any
size_t first_slash = url.find_first_of("/?#", proto_sep);
if (first_slash != std::string::npos){
if (url[first_slash] == '/'){
path = url.substr(first_slash + 1);
}else{
path = url.substr(first_slash);
}
size_t hmark = path.find('#');
if (hmark != std::string::npos){
frag = Encodings::URL::decode(path.substr(hmark + 1));
path.erase(hmark);
}
size_t qmark = path.find('?');
if (qmark != std::string::npos){
args = path.substr(qmark + 1);
path.erase(qmark);
}
if (path.size()){
if (path[0] == '/'){path.erase(0, 1);}
size_t dots = path.find("/./");
while (dots != std::string::npos){
DONTEVEN_MSG("%s (/./ -> /)", path.c_str());
path.erase(dots, 2);
dots = path.find("/./");
}
dots = path.find("//");
while (dots != std::string::npos){
DONTEVEN_MSG("%s (// -> /)", path.c_str());
path.erase(dots, 1);
dots = path.find("//");
}
if (path[0] == '/'){path.erase(0, 1);}
dots = path.find("/../");
while (dots != std::string::npos){
size_t prevslash = path.rfind('/', dots - 1);
if (prevslash == std::string::npos || dots == 0){
path.erase(0, dots + 4);
}else{
path.erase(prevslash + 1, dots - prevslash + 3);
}
dots = path.find("/../");
}
if (path.substr(0, 2) == "./"){path.erase(0, 2);}
if (path.substr(0, 3) == "../"){path.erase(0, 3);}
path = Encodings::URL::decode(path);
}
}
// user, pass, host and port are now definitely between proto_sep and first_slash
std::string uphp = url.substr(proto_sep, first_slash - proto_sep); // user+pass+host+port
// Check if we have a user/pass before the host
size_t at_sign = uphp.find('@');
if (at_sign != std::string::npos){
std::string creds = uphp.substr(0, at_sign);
uphp.erase(0, at_sign + 1);
size_t colon = creds.find(':');
if (colon != std::string::npos){
user = Encodings::URL::decode(creds.substr(0, colon));
pass = Encodings::URL::decode(creds.substr(colon + 1));
}else{
user = Encodings::URL::decode(creds);
}
}
// we check for [ at the start because we may have an IPv6 address as host
if (uphp[0] == '['){
// IPv6 address - find matching brace
IPv6Addr = true;
size_t closing_brace = uphp.find(']');
host = uphp.substr(1, closing_brace - 1);
// continue by finding port, if any
size_t colon = uphp.find(':', closing_brace);
if (colon == std::string::npos){
// no port. Assume default
port = "";
}else{
// we have a port number, read it
port = uphp.substr(colon + 1);
if (!is_numeric(port.c_str())){
host += ":" + port;
port = "";
}
}
}else{
//"normal" host - first find port, if any
size_t colon = uphp.rfind(':');
if (colon == std::string::npos){
// no port. Assume default
port = "";
host = uphp;
}else{
// we have a port number, read it
port = uphp.substr(colon + 1);
host = uphp.substr(0, colon);
if (!is_numeric(port.c_str())){
IPv6Addr = true;
host += ":" + port;
port = "";
}
}
}
// if the host is numeric, assume it is a port, instead
if (host.size() && is_numeric(host.c_str())){
port = host;
host = "";
}
EXTREME_MSG("URL: %s", getUrl().c_str());
}
/// Returns the port in numeric format
uint32_t HTTP::URL::getPort() const{
if (!port.size()){return getDefaultPort();}
return atoi(port.c_str());
}
/// Returns the default port for the protocol in numeric format
uint32_t HTTP::URL::getDefaultPort() const{
if (protocol == "http"){return 80;}
if (protocol == "https"){return 443;}
if (protocol == "rtmp"){return 1935;}
if (protocol == "rtmps"){return 443;}
if (protocol == "dtsc"){return 4200;}
if (protocol == "rtsp"){return 554;}
return 0;
}
/// Returns the file extension of the URL, or an empty string if none.
std::string HTTP::URL::getExt() const{
if (path.rfind('.') == std::string::npos){
return "";
}
return path.substr(path.rfind('.')+1);
}
/// Returns the full URL in string format
std::string HTTP::URL::getUrl() const{
std::string ret;
if (protocol.size()){
ret = protocol + "://";
}else{
ret = "//";
}
if (user.size() || pass.size()){
ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@";
}
if (IPv6Addr){
ret += "[" + host + "]";
}else{
ret += host;
}
if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;}
ret += "/";
if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");}
if (args.size()){ret += "?" + args;}
if (frag.size()){ret += "#" + Encodings::URL::encode(frag, "/:=@[]#?&");}
return ret;
}
/// Returns the full file path, in case this is a local file URI
std::string HTTP::URL::getFilePath() const{
return "/"+path;
}
/// Returns the URL in string format without auth and frag
std::string HTTP::URL::getProxyUrl() const{
std::string ret;
if (protocol.size()){
ret = protocol + "://";
}else{
ret = "//";
}
if (IPv6Addr){
ret += "[" + host + "]";
}else{
ret += host;
}
if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;}
ret += "/";
if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");}
if (args.size()){ret += "?" + args;}
return ret;
}
/// Returns the URL in string format without args and frag
std::string HTTP::URL::getBareUrl() const{
std::string ret;
if (protocol.size()){
ret = protocol + "://";
}else{
ret = "//";
}
if (user.size() || pass.size()){
ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@";
}
if (IPv6Addr){
ret += "[" + host + "]";
}else{
ret += host;
}
if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;}
ret += "/";
if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");}
return ret;
}
/// Returns a URL object for the given link, resolved relative to the current URL object.
HTTP::URL HTTP::URL::link(const std::string &l) const{
// Full link
if (l.find("://") < l.find('/') && l.find('/' != std::string::npos)){
DONTEVEN_MSG("Full link: %s", l.c_str());
return URL(l);
}
// Absolute link
if (l[0] == '/'){
DONTEVEN_MSG("Absolute link: %s", l.c_str());
if (l.size() > 1 && l[1] == '/'){
// Same-protocol full link
return URL(protocol + ":" + l);
}else{
// Same-domain/port absolute link
URL tmp = *this;
tmp.args.clear();
tmp.path = l.substr(1);
// Abuse the fact that we don't check for arguments in getUrl()
return URL(tmp.getUrl());
}
}
// Relative link
std::string tmpUrl = getBareUrl();
size_t slashPos = tmpUrl.rfind('/');
if (slashPos == std::string::npos){
tmpUrl += "/";
}else{
tmpUrl.erase(slashPos + 1);
}
DONTEVEN_MSG("Relative link: %s+%s", tmpUrl.c_str(), l.c_str());
return URL(tmpUrl + l);
}
/// This constructor creates an empty HTTP::Parser, ready for use for either reading or writing. /// This constructor creates an empty HTTP::Parser, ready for use for either reading or writing.
/// All this constructor does is call HTTP::Parser::Clean(). /// All this constructor does is call HTTP::Parser::Clean().
HTTP::Parser::Parser(){ HTTP::Parser::Parser(){

View file

@ -73,28 +73,5 @@ namespace HTTP{
void Trim(std::string &s); void Trim(std::string &s);
}; };
/// URL parsing class. Parses full URL into its subcomponents
class URL{
public:
URL(const std::string &url = "");
uint32_t getPort() const;
uint32_t getDefaultPort() const;
std::string getExt() const;
std::string getUrl() const;
std::string getFilePath() const;
std::string getBareUrl() const;
std::string getProxyUrl() const;
std::string host; ///< Hostname or IP address of URL
std::string protocol; ///< Protocol of URL
std::string port; ///< Port of URL
std::string path; ///< Path after the first slash (not inclusive) but before any question mark
std::string args; ///< Everything after the question mark in the path, if it was present
std::string frag; ///< Everything after the # in the path, if it was present
std::string user; ///< Username, if it was present
std::string pass; ///< Password, if it was present
URL link(const std::string &l) const;
bool IPv6Addr;
};
}// namespace HTTP }// namespace HTTP

267
lib/url.cpp Normal file
View file

@ -0,0 +1,267 @@
/// \file http_parser.cpp
/// Holds all code for the HTTP namespace.
#include "url.h"
#include "defines.h"
#include "encode.h"
/// Helper function to check if the given c-string is numeric or not
static bool is_numeric(const char *str){
while (str[0] != 0){
if (str[0] < 48 || str[0] > 57){return false;}
++str;
}
return true;
}
/// Constructor that does the actual parsing
HTTP::URL::URL(const std::string &url){
IPv6Addr = false;
// first detect protocol at the start, if any
size_t proto_sep = url.find("://");
if (proto_sep != std::string::npos){
protocol = url.substr(0, proto_sep);
proto_sep += 3;
}else{
proto_sep = 0;
if (url.substr(0, 2) == "//"){proto_sep = 2;}
}
// proto_sep now points to the start of the host, guaranteed
// continue by finding the path, if any
size_t first_slash = url.find_first_of("/?#", proto_sep);
if (first_slash != std::string::npos){
if (url[first_slash] == '/'){
path = url.substr(first_slash + 1);
}else{
path = url.substr(first_slash);
}
size_t hmark = path.find('#');
if (hmark != std::string::npos){
frag = Encodings::URL::decode(path.substr(hmark + 1));
path.erase(hmark);
}
size_t qmark = path.find('?');
if (qmark != std::string::npos){
args = path.substr(qmark + 1);
path.erase(qmark);
}
if (path.size()){
if (path[0] == '/'){path.erase(0, 1);}
size_t dots = path.find("/./");
while (dots != std::string::npos){
DONTEVEN_MSG("%s (/./ -> /)", path.c_str());
path.erase(dots, 2);
dots = path.find("/./");
}
dots = path.find("//");
while (dots != std::string::npos){
DONTEVEN_MSG("%s (// -> /)", path.c_str());
path.erase(dots, 1);
dots = path.find("//");
}
if (path[0] == '/'){path.erase(0, 1);}
dots = path.find("/../");
while (dots != std::string::npos){
size_t prevslash = path.rfind('/', dots - 1);
if (prevslash == std::string::npos || dots == 0){
path.erase(0, dots + 4);
}else{
path.erase(prevslash + 1, dots - prevslash + 3);
}
dots = path.find("/../");
}
if (path.substr(0, 2) == "./"){path.erase(0, 2);}
if (path.substr(0, 3) == "../"){path.erase(0, 3);}
path = Encodings::URL::decode(path);
}
}
// user, pass, host and port are now definitely between proto_sep and first_slash
std::string uphp = url.substr(proto_sep, first_slash - proto_sep); // user+pass+host+port
// Check if we have a user/pass before the host
size_t at_sign = uphp.find('@');
if (at_sign != std::string::npos){
std::string creds = uphp.substr(0, at_sign);
uphp.erase(0, at_sign + 1);
size_t colon = creds.find(':');
if (colon != std::string::npos){
user = Encodings::URL::decode(creds.substr(0, colon));
pass = Encodings::URL::decode(creds.substr(colon + 1));
}else{
user = Encodings::URL::decode(creds);
}
}
// we check for [ at the start because we may have an IPv6 address as host
if (uphp[0] == '['){
// IPv6 address - find matching brace
IPv6Addr = true;
size_t closing_brace = uphp.find(']');
host = uphp.substr(1, closing_brace - 1);
// continue by finding port, if any
size_t colon = uphp.find(':', closing_brace);
if (colon == std::string::npos){
// no port. Assume default
port = "";
}else{
// we have a port number, read it
port = uphp.substr(colon + 1);
if (!is_numeric(port.c_str())){
host += ":" + port;
port = "";
}
}
}else{
//"normal" host - first find port, if any
size_t colon = uphp.rfind(':');
if (colon == std::string::npos){
// no port. Assume default
port = "";
host = uphp;
}else{
// we have a port number, read it
port = uphp.substr(colon + 1);
host = uphp.substr(0, colon);
if (!is_numeric(port.c_str())){
IPv6Addr = true;
host += ":" + port;
port = "";
}
}
}
// if the host is numeric, assume it is a port, instead
if (host.size() && is_numeric(host.c_str())){
port = host;
host = "";
}
EXTREME_MSG("URL: %s", getUrl().c_str());
}
/// Returns the port in numeric format
uint32_t HTTP::URL::getPort() const{
if (!port.size()){return getDefaultPort();}
return atoi(port.c_str());
}
/// Returns the default port for the protocol in numeric format
uint32_t HTTP::URL::getDefaultPort() const{
if (protocol == "http"){return 80;}
if (protocol == "https"){return 443;}
if (protocol == "rtmp"){return 1935;}
if (protocol == "rtmps"){return 443;}
if (protocol == "dtsc"){return 4200;}
if (protocol == "rtsp"){return 554;}
return 0;
}
/// Returns the file extension of the URL, or an empty string if none.
std::string HTTP::URL::getExt() const{
if (path.rfind('.') == std::string::npos){
return "";
}
return path.substr(path.rfind('.')+1);
}
/// Returns the full URL in string format
std::string HTTP::URL::getUrl() const{
std::string ret;
if (protocol.size()){
ret = protocol + "://";
}else{
ret = "//";
}
if (user.size() || pass.size()){
ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@";
}
if (IPv6Addr){
ret += "[" + host + "]";
}else{
ret += host;
}
if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;}
ret += "/";
if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");}
if (args.size()){ret += "?" + args;}
if (frag.size()){ret += "#" + Encodings::URL::encode(frag, "/:=@[]#?&");}
return ret;
}
/// Returns the full file path, in case this is a local file URI
std::string HTTP::URL::getFilePath() const{
return "/"+path;
}
/// Returns the URL in string format without auth and frag
std::string HTTP::URL::getProxyUrl() const{
std::string ret;
if (protocol.size()){
ret = protocol + "://";
}else{
ret = "//";
}
if (IPv6Addr){
ret += "[" + host + "]";
}else{
ret += host;
}
if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;}
ret += "/";
if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");}
if (args.size()){ret += "?" + args;}
return ret;
}
/// Returns the URL in string format without args and frag
std::string HTTP::URL::getBareUrl() const{
std::string ret;
if (protocol.size()){
ret = protocol + "://";
}else{
ret = "//";
}
if (user.size() || pass.size()){
ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@";
}
if (IPv6Addr){
ret += "[" + host + "]";
}else{
ret += host;
}
if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;}
ret += "/";
if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");}
return ret;
}
/// Returns a URL object for the given link, resolved relative to the current URL object.
HTTP::URL HTTP::URL::link(const std::string &l) const{
// Full link
if (l.find("://") < l.find('/') && l.find('/' != std::string::npos)){
DONTEVEN_MSG("Full link: %s", l.c_str());
return URL(l);
}
// Absolute link
if (l[0] == '/'){
DONTEVEN_MSG("Absolute link: %s", l.c_str());
if (l.size() > 1 && l[1] == '/'){
// Same-protocol full link
return URL(protocol + ":" + l);
}else{
// Same-domain/port absolute link
URL tmp = *this;
tmp.args.clear();
tmp.path = l.substr(1);
// Abuse the fact that we don't check for arguments in getUrl()
return URL(tmp.getUrl());
}
}
// Relative link
std::string tmpUrl = getBareUrl();
size_t slashPos = tmpUrl.rfind('/');
if (slashPos == std::string::npos){
tmpUrl += "/";
}else{
tmpUrl.erase(slashPos + 1);
}
DONTEVEN_MSG("Relative link: %s+%s", tmpUrl.c_str(), l.c_str());
return URL(tmpUrl + l);
}

35
lib/url.h Normal file
View file

@ -0,0 +1,35 @@
/// \file url.h
/// Holds all headers for the HTTP::URL class.
#pragma once
#include <stdlib.h>
#include <string>
/// Holds all HTTP processing related code.
namespace HTTP{
/// URL parsing class. Parses full URL into its subcomponents
class URL{
public:
URL(const std::string &url = "");
uint32_t getPort() const;
uint32_t getDefaultPort() const;
std::string getExt() const;
std::string getUrl() const;
std::string getFilePath() const;
std::string getBareUrl() const;
std::string getProxyUrl() const;
std::string host; ///< Hostname or IP address of URL
std::string protocol; ///< Protocol of URL
std::string port; ///< Port of URL
std::string path; ///< Path after the first slash (not inclusive) but before any question mark
std::string args; ///< Everything after the question mark in the path, if it was present
std::string frag; ///< Everything after the # in the path, if it was present
std::string user; ///< Username, if it was present
std::string pass; ///< Password, if it was present
URL link(const std::string &l) const;
bool IPv6Addr;
};
}// namespace HTTP

View file

@ -3,6 +3,7 @@
#include <mist/stream.h> #include <mist/stream.h>
#include <mist/encode.h> #include <mist/encode.h>
#include <mist/langcodes.h> #include <mist/langcodes.h>
#include <mist/url.h>
#include "flashPlayer.h" #include "flashPlayer.h"
#include "oldFlashPlayer.h" #include "oldFlashPlayer.h"
#include <mist/websocket.h> #include <mist/websocket.h>