From 793f6f78094663e3c353cfce9978501a6bae46cd Mon Sep 17 00:00:00 2001 From: Thulinma Date: Fri, 28 Feb 2020 11:33:12 +0100 Subject: [PATCH] Split HTTP namespace up into http_parser.h and url.h, since the URL library can be useful stand-alone as well. --- CMakeLists.txt | 2 + lib/downloader.h | 1 + lib/http_parser.cpp | 261 +-------------------------- lib/http_parser.h | 23 --- lib/url.cpp | 267 ++++++++++++++++++++++++++++ lib/url.h | 35 ++++ src/output/output_http_internal.cpp | 1 + 7 files changed, 307 insertions(+), 283 deletions(-) create mode 100644 lib/url.cpp create mode 100644 lib/url.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 565252fd..c4bc71da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,6 +138,7 @@ set(libHeaders lib/ebml.h lib/ebml_socketglue.h lib/websocket.h + lib/url.h ) ######################################## @@ -181,6 +182,7 @@ add_library (mist lib/ebml.cpp lib/ebml_socketglue.cpp lib/websocket.cpp + lib/url.cpp ) if (NOT APPLE) set (LIBRT -lrt) diff --git a/lib/downloader.h b/lib/downloader.h index 562497ad..35584b42 100644 --- a/lib/downloader.h +++ b/lib/downloader.h @@ -1,4 +1,5 @@ #include "http_parser.h" +#include "url.h" #include "socket.h" namespace HTTP{ diff --git a/lib/http_parser.cpp b/lib/http_parser.cpp index e9fe47b3..283b6c67 100644 --- a/lib/http_parser.cpp +++ b/lib/http_parser.cpp @@ -7,268 +7,9 @@ #include "defines.h" #include "encode.h" #include "timing.h" +#include "url.h" #include -/// Helper function to check if the given c-string is numeric or not -static bool is_numeric(const char *str){ - while (str[0] != 0){ - if (str[0] < 48 || str[0] > 57){return false;} - ++str; - } - return true; -} - -/// Constructor that does the actual parsing -HTTP::URL::URL(const std::string &url){ - IPv6Addr = false; - // first detect protocol at the start, if any - size_t proto_sep = url.find("://"); - if (proto_sep != std::string::npos){ - protocol = url.substr(0, proto_sep); - proto_sep += 3; - }else{ - proto_sep = 0; - if (url.substr(0, 2) == "//"){proto_sep = 2;} - } - // proto_sep now points to the start of the host, guaranteed - // continue by finding the path, if any - size_t first_slash = url.find_first_of("/?#", proto_sep); - if (first_slash != std::string::npos){ - if (url[first_slash] == '/'){ - path = url.substr(first_slash + 1); - }else{ - path = url.substr(first_slash); - } - size_t hmark = path.find('#'); - if (hmark != std::string::npos){ - frag = Encodings::URL::decode(path.substr(hmark + 1)); - path.erase(hmark); - } - size_t qmark = path.find('?'); - if (qmark != std::string::npos){ - args = path.substr(qmark + 1); - path.erase(qmark); - } - if (path.size()){ - if (path[0] == '/'){path.erase(0, 1);} - size_t dots = path.find("/./"); - while (dots != std::string::npos){ - DONTEVEN_MSG("%s (/./ -> /)", path.c_str()); - path.erase(dots, 2); - dots = path.find("/./"); - } - dots = path.find("//"); - while (dots != std::string::npos){ - DONTEVEN_MSG("%s (// -> /)", path.c_str()); - path.erase(dots, 1); - dots = path.find("//"); - } - if (path[0] == '/'){path.erase(0, 1);} - dots = path.find("/../"); - while (dots != std::string::npos){ - size_t prevslash = path.rfind('/', dots - 1); - if (prevslash == std::string::npos || dots == 0){ - path.erase(0, dots + 4); - }else{ - path.erase(prevslash + 1, dots - prevslash + 3); - } - dots = path.find("/../"); - } - if (path.substr(0, 2) == "./"){path.erase(0, 2);} - if (path.substr(0, 3) == "../"){path.erase(0, 3);} - path = Encodings::URL::decode(path); - } - } - // user, pass, host and port are now definitely between proto_sep and first_slash - std::string uphp = url.substr(proto_sep, first_slash - proto_sep); // user+pass+host+port - // Check if we have a user/pass before the host - size_t at_sign = uphp.find('@'); - if (at_sign != std::string::npos){ - std::string creds = uphp.substr(0, at_sign); - uphp.erase(0, at_sign + 1); - size_t colon = creds.find(':'); - if (colon != std::string::npos){ - user = Encodings::URL::decode(creds.substr(0, colon)); - pass = Encodings::URL::decode(creds.substr(colon + 1)); - }else{ - user = Encodings::URL::decode(creds); - } - } - // we check for [ at the start because we may have an IPv6 address as host - if (uphp[0] == '['){ - // IPv6 address - find matching brace - IPv6Addr = true; - size_t closing_brace = uphp.find(']'); - host = uphp.substr(1, closing_brace - 1); - // continue by finding port, if any - size_t colon = uphp.find(':', closing_brace); - if (colon == std::string::npos){ - // no port. Assume default - port = ""; - }else{ - // we have a port number, read it - port = uphp.substr(colon + 1); - if (!is_numeric(port.c_str())){ - host += ":" + port; - port = ""; - } - } - }else{ - //"normal" host - first find port, if any - size_t colon = uphp.rfind(':'); - if (colon == std::string::npos){ - // no port. Assume default - port = ""; - host = uphp; - }else{ - // we have a port number, read it - port = uphp.substr(colon + 1); - host = uphp.substr(0, colon); - if (!is_numeric(port.c_str())){ - IPv6Addr = true; - host += ":" + port; - port = ""; - } - } - } - // if the host is numeric, assume it is a port, instead - if (host.size() && is_numeric(host.c_str())){ - port = host; - host = ""; - } - EXTREME_MSG("URL: %s", getUrl().c_str()); -} - -/// Returns the port in numeric format -uint32_t HTTP::URL::getPort() const{ - if (!port.size()){return getDefaultPort();} - return atoi(port.c_str()); -} - -/// Returns the default port for the protocol in numeric format -uint32_t HTTP::URL::getDefaultPort() const{ - if (protocol == "http"){return 80;} - if (protocol == "https"){return 443;} - if (protocol == "rtmp"){return 1935;} - if (protocol == "rtmps"){return 443;} - if (protocol == "dtsc"){return 4200;} - if (protocol == "rtsp"){return 554;} - return 0; -} - -/// Returns the file extension of the URL, or an empty string if none. -std::string HTTP::URL::getExt() const{ - if (path.rfind('.') == std::string::npos){ - return ""; - } - return path.substr(path.rfind('.')+1); -} - -/// Returns the full URL in string format -std::string HTTP::URL::getUrl() const{ - std::string ret; - if (protocol.size()){ - ret = protocol + "://"; - }else{ - ret = "//"; - } - if (user.size() || pass.size()){ - ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@"; - } - if (IPv6Addr){ - ret += "[" + host + "]"; - }else{ - ret += host; - } - if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} - ret += "/"; - if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");} - if (args.size()){ret += "?" + args;} - if (frag.size()){ret += "#" + Encodings::URL::encode(frag, "/:=@[]#?&");} - return ret; -} - -/// Returns the full file path, in case this is a local file URI -std::string HTTP::URL::getFilePath() const{ - return "/"+path; -} - -/// Returns the URL in string format without auth and frag -std::string HTTP::URL::getProxyUrl() const{ - std::string ret; - if (protocol.size()){ - ret = protocol + "://"; - }else{ - ret = "//"; - } - if (IPv6Addr){ - ret += "[" + host + "]"; - }else{ - ret += host; - } - if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} - ret += "/"; - if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");} - if (args.size()){ret += "?" + args;} - return ret; -} - -/// Returns the URL in string format without args and frag -std::string HTTP::URL::getBareUrl() const{ - std::string ret; - if (protocol.size()){ - ret = protocol + "://"; - }else{ - ret = "//"; - } - if (user.size() || pass.size()){ - ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@"; - } - if (IPv6Addr){ - ret += "[" + host + "]"; - }else{ - ret += host; - } - if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} - ret += "/"; - if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");} - return ret; -} - -/// Returns a URL object for the given link, resolved relative to the current URL object. -HTTP::URL HTTP::URL::link(const std::string &l) const{ - // Full link - if (l.find("://") < l.find('/') && l.find('/' != std::string::npos)){ - DONTEVEN_MSG("Full link: %s", l.c_str()); - return URL(l); - } - // Absolute link - if (l[0] == '/'){ - DONTEVEN_MSG("Absolute link: %s", l.c_str()); - if (l.size() > 1 && l[1] == '/'){ - // Same-protocol full link - return URL(protocol + ":" + l); - }else{ - // Same-domain/port absolute link - URL tmp = *this; - tmp.args.clear(); - tmp.path = l.substr(1); - // Abuse the fact that we don't check for arguments in getUrl() - return URL(tmp.getUrl()); - } - } - // Relative link - std::string tmpUrl = getBareUrl(); - size_t slashPos = tmpUrl.rfind('/'); - if (slashPos == std::string::npos){ - tmpUrl += "/"; - }else{ - tmpUrl.erase(slashPos + 1); - } - DONTEVEN_MSG("Relative link: %s+%s", tmpUrl.c_str(), l.c_str()); - return URL(tmpUrl + l); -} - /// This constructor creates an empty HTTP::Parser, ready for use for either reading or writing. /// All this constructor does is call HTTP::Parser::Clean(). HTTP::Parser::Parser(){ diff --git a/lib/http_parser.h b/lib/http_parser.h index 9172eae6..302437cb 100644 --- a/lib/http_parser.h +++ b/lib/http_parser.h @@ -73,28 +73,5 @@ namespace HTTP{ void Trim(std::string &s); }; - /// URL parsing class. Parses full URL into its subcomponents - class URL{ - public: - URL(const std::string &url = ""); - uint32_t getPort() const; - uint32_t getDefaultPort() const; - std::string getExt() const; - std::string getUrl() const; - std::string getFilePath() const; - std::string getBareUrl() const; - std::string getProxyUrl() const; - std::string host; ///< Hostname or IP address of URL - std::string protocol; ///< Protocol of URL - std::string port; ///< Port of URL - std::string path; ///< Path after the first slash (not inclusive) but before any question mark - std::string args; ///< Everything after the question mark in the path, if it was present - std::string frag; ///< Everything after the # in the path, if it was present - std::string user; ///< Username, if it was present - std::string pass; ///< Password, if it was present - URL link(const std::string &l) const; - bool IPv6Addr; - }; - }// namespace HTTP diff --git a/lib/url.cpp b/lib/url.cpp new file mode 100644 index 00000000..43e5c945 --- /dev/null +++ b/lib/url.cpp @@ -0,0 +1,267 @@ +/// \file http_parser.cpp +/// Holds all code for the HTTP namespace. + +#include "url.h" +#include "defines.h" +#include "encode.h" + +/// Helper function to check if the given c-string is numeric or not +static bool is_numeric(const char *str){ + while (str[0] != 0){ + if (str[0] < 48 || str[0] > 57){return false;} + ++str; + } + return true; +} + +/// Constructor that does the actual parsing +HTTP::URL::URL(const std::string &url){ + IPv6Addr = false; + // first detect protocol at the start, if any + size_t proto_sep = url.find("://"); + if (proto_sep != std::string::npos){ + protocol = url.substr(0, proto_sep); + proto_sep += 3; + }else{ + proto_sep = 0; + if (url.substr(0, 2) == "//"){proto_sep = 2;} + } + // proto_sep now points to the start of the host, guaranteed + // continue by finding the path, if any + size_t first_slash = url.find_first_of("/?#", proto_sep); + if (first_slash != std::string::npos){ + if (url[first_slash] == '/'){ + path = url.substr(first_slash + 1); + }else{ + path = url.substr(first_slash); + } + size_t hmark = path.find('#'); + if (hmark != std::string::npos){ + frag = Encodings::URL::decode(path.substr(hmark + 1)); + path.erase(hmark); + } + size_t qmark = path.find('?'); + if (qmark != std::string::npos){ + args = path.substr(qmark + 1); + path.erase(qmark); + } + if (path.size()){ + if (path[0] == '/'){path.erase(0, 1);} + size_t dots = path.find("/./"); + while (dots != std::string::npos){ + DONTEVEN_MSG("%s (/./ -> /)", path.c_str()); + path.erase(dots, 2); + dots = path.find("/./"); + } + dots = path.find("//"); + while (dots != std::string::npos){ + DONTEVEN_MSG("%s (// -> /)", path.c_str()); + path.erase(dots, 1); + dots = path.find("//"); + } + if (path[0] == '/'){path.erase(0, 1);} + dots = path.find("/../"); + while (dots != std::string::npos){ + size_t prevslash = path.rfind('/', dots - 1); + if (prevslash == std::string::npos || dots == 0){ + path.erase(0, dots + 4); + }else{ + path.erase(prevslash + 1, dots - prevslash + 3); + } + dots = path.find("/../"); + } + if (path.substr(0, 2) == "./"){path.erase(0, 2);} + if (path.substr(0, 3) == "../"){path.erase(0, 3);} + path = Encodings::URL::decode(path); + } + } + // user, pass, host and port are now definitely between proto_sep and first_slash + std::string uphp = url.substr(proto_sep, first_slash - proto_sep); // user+pass+host+port + // Check if we have a user/pass before the host + size_t at_sign = uphp.find('@'); + if (at_sign != std::string::npos){ + std::string creds = uphp.substr(0, at_sign); + uphp.erase(0, at_sign + 1); + size_t colon = creds.find(':'); + if (colon != std::string::npos){ + user = Encodings::URL::decode(creds.substr(0, colon)); + pass = Encodings::URL::decode(creds.substr(colon + 1)); + }else{ + user = Encodings::URL::decode(creds); + } + } + // we check for [ at the start because we may have an IPv6 address as host + if (uphp[0] == '['){ + // IPv6 address - find matching brace + IPv6Addr = true; + size_t closing_brace = uphp.find(']'); + host = uphp.substr(1, closing_brace - 1); + // continue by finding port, if any + size_t colon = uphp.find(':', closing_brace); + if (colon == std::string::npos){ + // no port. Assume default + port = ""; + }else{ + // we have a port number, read it + port = uphp.substr(colon + 1); + if (!is_numeric(port.c_str())){ + host += ":" + port; + port = ""; + } + } + }else{ + //"normal" host - first find port, if any + size_t colon = uphp.rfind(':'); + if (colon == std::string::npos){ + // no port. Assume default + port = ""; + host = uphp; + }else{ + // we have a port number, read it + port = uphp.substr(colon + 1); + host = uphp.substr(0, colon); + if (!is_numeric(port.c_str())){ + IPv6Addr = true; + host += ":" + port; + port = ""; + } + } + } + // if the host is numeric, assume it is a port, instead + if (host.size() && is_numeric(host.c_str())){ + port = host; + host = ""; + } + EXTREME_MSG("URL: %s", getUrl().c_str()); +} + +/// Returns the port in numeric format +uint32_t HTTP::URL::getPort() const{ + if (!port.size()){return getDefaultPort();} + return atoi(port.c_str()); +} + +/// Returns the default port for the protocol in numeric format +uint32_t HTTP::URL::getDefaultPort() const{ + if (protocol == "http"){return 80;} + if (protocol == "https"){return 443;} + if (protocol == "rtmp"){return 1935;} + if (protocol == "rtmps"){return 443;} + if (protocol == "dtsc"){return 4200;} + if (protocol == "rtsp"){return 554;} + return 0; +} + +/// Returns the file extension of the URL, or an empty string if none. +std::string HTTP::URL::getExt() const{ + if (path.rfind('.') == std::string::npos){ + return ""; + } + return path.substr(path.rfind('.')+1); +} + +/// Returns the full URL in string format +std::string HTTP::URL::getUrl() const{ + std::string ret; + if (protocol.size()){ + ret = protocol + "://"; + }else{ + ret = "//"; + } + if (user.size() || pass.size()){ + ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@"; + } + if (IPv6Addr){ + ret += "[" + host + "]"; + }else{ + ret += host; + } + if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} + ret += "/"; + if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");} + if (args.size()){ret += "?" + args;} + if (frag.size()){ret += "#" + Encodings::URL::encode(frag, "/:=@[]#?&");} + return ret; +} + +/// Returns the full file path, in case this is a local file URI +std::string HTTP::URL::getFilePath() const{ + return "/"+path; +} + +/// Returns the URL in string format without auth and frag +std::string HTTP::URL::getProxyUrl() const{ + std::string ret; + if (protocol.size()){ + ret = protocol + "://"; + }else{ + ret = "//"; + } + if (IPv6Addr){ + ret += "[" + host + "]"; + }else{ + ret += host; + } + if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} + ret += "/"; + if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");} + if (args.size()){ret += "?" + args;} + return ret; +} + +/// Returns the URL in string format without args and frag +std::string HTTP::URL::getBareUrl() const{ + std::string ret; + if (protocol.size()){ + ret = protocol + "://"; + }else{ + ret = "//"; + } + if (user.size() || pass.size()){ + ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@"; + } + if (IPv6Addr){ + ret += "[" + host + "]"; + }else{ + ret += host; + } + if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} + ret += "/"; + if (path.size()){ret += Encodings::URL::encode(path, "/:=@[]");} + return ret; +} + +/// Returns a URL object for the given link, resolved relative to the current URL object. +HTTP::URL HTTP::URL::link(const std::string &l) const{ + // Full link + if (l.find("://") < l.find('/') && l.find('/' != std::string::npos)){ + DONTEVEN_MSG("Full link: %s", l.c_str()); + return URL(l); + } + // Absolute link + if (l[0] == '/'){ + DONTEVEN_MSG("Absolute link: %s", l.c_str()); + if (l.size() > 1 && l[1] == '/'){ + // Same-protocol full link + return URL(protocol + ":" + l); + }else{ + // Same-domain/port absolute link + URL tmp = *this; + tmp.args.clear(); + tmp.path = l.substr(1); + // Abuse the fact that we don't check for arguments in getUrl() + return URL(tmp.getUrl()); + } + } + // Relative link + std::string tmpUrl = getBareUrl(); + size_t slashPos = tmpUrl.rfind('/'); + if (slashPos == std::string::npos){ + tmpUrl += "/"; + }else{ + tmpUrl.erase(slashPos + 1); + } + DONTEVEN_MSG("Relative link: %s+%s", tmpUrl.c_str(), l.c_str()); + return URL(tmpUrl + l); +} + diff --git a/lib/url.h b/lib/url.h new file mode 100644 index 00000000..0b12dcd7 --- /dev/null +++ b/lib/url.h @@ -0,0 +1,35 @@ +/// \file url.h +/// Holds all headers for the HTTP::URL class. + +#pragma once +#include +#include + +/// Holds all HTTP processing related code. +namespace HTTP{ + + /// URL parsing class. Parses full URL into its subcomponents + class URL{ + public: + URL(const std::string &url = ""); + uint32_t getPort() const; + uint32_t getDefaultPort() const; + std::string getExt() const; + std::string getUrl() const; + std::string getFilePath() const; + std::string getBareUrl() const; + std::string getProxyUrl() const; + std::string host; ///< Hostname or IP address of URL + std::string protocol; ///< Protocol of URL + std::string port; ///< Port of URL + std::string path; ///< Path after the first slash (not inclusive) but before any question mark + std::string args; ///< Everything after the question mark in the path, if it was present + std::string frag; ///< Everything after the # in the path, if it was present + std::string user; ///< Username, if it was present + std::string pass; ///< Password, if it was present + URL link(const std::string &l) const; + bool IPv6Addr; + }; + +}// namespace HTTP + diff --git a/src/output/output_http_internal.cpp b/src/output/output_http_internal.cpp index bdc1ddbb..d09df3ab 100644 --- a/src/output/output_http_internal.cpp +++ b/src/output/output_http_internal.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include "flashPlayer.h" #include "oldFlashPlayer.h" #include