diff --git a/lib/encode.cpp b/lib/encode.cpp index 41609a6f..5e9116b7 100644 --- a/lib/encode.cpp +++ b/lib/encode.cpp @@ -1,93 +1,71 @@ #include "encode.h" -namespace Encodings { +namespace Encodings{ /// Needed for base64_encode function - const std::string Base64::chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + const std::string Base64::chars = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /// Helper for base64_decode function - inline bool Base64::is_base64(unsigned char c) { + inline bool Base64::is_base64(unsigned char c){ return (isalnum(c) || (c == '+') || (c == '/')); } - /// Used to base64 encode data. Input is the plaintext as std::string, output is the encoded data as std::string. - /// \param input Plaintext data to encode. - /// \returns Base64 encoded data. - std::string Base64::encode(std::string const input) { + /// Used to base64 encode data. Input is the plaintext as std::string, output is the encoded data + /// as std::string. \param input Plaintext data to encode. \returns Base64 encoded data. + std::string Base64::encode(std::string const input){ std::string ret; unsigned int in_len = input.size(); char quad[4], triple[3]; unsigned int i, x, n = 3; - for (x = 0; x < in_len; x = x + 3) { - if ((in_len - x) / 3 == 0) { - n = (in_len - x) % 3; - } - for (i = 0; i < 3; i++) { - triple[i] = '0'; - } - for (i = 0; i < n; i++) { - triple[i] = input[x + i]; - } - quad[0] = chars[(triple[0] & 0xFC) >> 2]; // FC = 11111100 + for (x = 0; x < in_len; x = x + 3){ + if ((in_len - x) / 3 == 0){n = (in_len - x) % 3;} + for (i = 0; i < 3; i++){triple[i] = '0';} + for (i = 0; i < n; i++){triple[i] = input[x + i];} + quad[0] = chars[(triple[0] & 0xFC) >> 2]; // FC = 11111100 quad[1] = chars[((triple[0] & 0x03) << 4) | ((triple[1] & 0xF0) >> 4)]; // 03 = 11 quad[2] = chars[((triple[1] & 0x0F) << 2) | ((triple[2] & 0xC0) >> 6)]; // 0F = 1111, C0=11110 - quad[3] = chars[triple[2] & 0x3F]; // 3F = 111111 - if (n < 3) { - quad[3] = '='; - } - if (n < 2) { - quad[2] = '='; - } - for (i = 0; i < 4; i++) { - ret += quad[i]; - } + quad[3] = chars[triple[2] & 0x3F]; // 3F = 111111 + if (n < 3){quad[3] = '=';} + if (n < 2){quad[2] = '=';} + for (i = 0; i < 4; i++){ret += quad[i];} } return ret; - } //base64_encode + }// base64_encode - /// Used to base64 decode data. Input is the encoded data as std::string, output is the plaintext data as std::string. - /// \param encoded_string Base64 encoded data to decode. - /// \returns Plaintext decoded data. - std::string Base64::decode(std::string const & encoded_string) { + /// Used to base64 decode data. Input is the encoded data as std::string, output is the plaintext + /// data as std::string. \param encoded_string Base64 encoded data to decode. \returns Plaintext + /// decoded data. + std::string Base64::decode(std::string const &encoded_string){ int in_len = encoded_string.size(); int i = 0; int j = 0; int in_ = 0; unsigned char char_array_4[4], char_array_3[3]; std::string ret; - while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])) { - char_array_4[i++ ] = encoded_string[in_]; + while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_])){ + char_array_4[i++] = encoded_string[in_]; in_++; - if (i == 4) { - for (i = 0; i < 4; i++) { - char_array_4[i] = chars.find(char_array_4[i]); - } + if (i == 4){ + for (i = 0; i < 4; i++){char_array_4[i] = chars.find(char_array_4[i]);} char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; - for (i = 0; (i < 3); i++) { - ret += char_array_3[i]; - } + for (i = 0; (i < 3); i++){ret += char_array_3[i];} i = 0; } } - if (i) { - for (j = i; j < 4; j++) { - char_array_4[j] = 0; - } - for (j = 0; j < 4; j++) { - char_array_4[j] = chars.find(char_array_4[j]); - } + if (i){ + for (j = i; j < 4; j++){char_array_4[j] = 0;} + for (j = 0; j < 4; j++){char_array_4[j] = chars.find(char_array_4[j]);} char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; - for (j = 0; (j < i - 1); j++) - ret += char_array_3[j]; + for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; } return ret; } - /// Encodes a single character as two hex digits in string form. std::string Hex::chr(char dec){ char dig1 = (dec & 0xF0) >> 4; @@ -103,50 +81,52 @@ namespace Encodings { } /// Decodes a hex-encoded std::string to a raw binary std::string. - std::string Hex::decode(const std::string & in){ - std::string ret(in.size()/2, '\000'); + std::string Hex::decode(const std::string &in){ + std::string ret(in.size() / 2, '\000'); for (size_t i = 0; i < in.size(); ++i){ char c = in[i]; - ret[i>>1] |= ((c&15) + (((c&64)>>6) | ((c&64)>>3))) << ((~i&1) << 2); + ret[i >> 1] |= ((c & 15) + (((c & 64) >> 6) | ((c & 64) >> 3))) << ((~i & 1) << 2); } return ret; } /// urlencodes std::string data, leaving only the characters A-Za-z0-9~!&()' alone. - std::string URL::encode(const std::string & c){ + std::string URL::encode(const std::string &c){ std::string escaped = ""; int max = c.length(); - for (int i = 0; i < max; i++) { - if (('0' <= c[i] && c[i] <= '9') || ('a' <= c[i] && c[i] <= 'z') || ('A' <= c[i] && c[i] <= 'Z') - || (c[i] == '~' || c[i] == '!' || c[i] == '*' || c[i] == '(' || c[i] == ')' || c[i] == '\'')) { + for (int i = 0; i < max; i++){ + if (('0' <= c[i] && c[i] <= '9') || ('a' <= c[i] && c[i] <= 'z') || + ('A' <= c[i] && c[i] <= 'Z') || + (c[i] == '~' || c[i] == '!' || c[i] == '*' || c[i] == '(' || c[i] == ')' || c[i] == '/' || + c[i] == '\'')){ escaped.append(&c[i], 1); - } else { - escaped.append("%"); - escaped.append(Hex::chr(c[i])); + }else{ + if (c[i] == ' '){ + escaped.append("+"); + }else{ + escaped.append("%"); + escaped.append(Hex::chr(c[i])); + } } } return escaped; } /// urldecodes std::string data, parsing out both %-encoded characters and +-encoded spaces. - std::string URL::decode(const std::string & in){ + std::string URL::decode(const std::string &in){ std::string out; - for (unsigned int i = 0; i < in.length(); ++i) { - if (in[i] == '%') { + for (unsigned int i = 0; i < in.length(); ++i){ + if (in[i] == '%'){ char tmp = 0; ++i; - if (i < in.length()) { - tmp = Hex::ord(in[i]) << 4; - } + if (i < in.length()){tmp = Hex::ord(in[i]) << 4;} ++i; - if (i < in.length()) { - tmp += Hex::ord(in[i]); - } + if (i < in.length()){tmp += Hex::ord(in[i]);} out += tmp; - } else { - if (in[i] == '+') { + }else{ + if (in[i] == '+'){ out += ' '; - } else { + }else{ out += in[i]; } } @@ -154,5 +134,5 @@ namespace Encodings { return out; } -}//Encodings namespace +}// namespace Encodings diff --git a/lib/http_parser.cpp b/lib/http_parser.cpp index 3b4bf7dc..0908b0ea 100644 --- a/lib/http_parser.cpp +++ b/lib/http_parser.cpp @@ -8,7 +8,7 @@ /// Helper function to check if the given c-string is numeric or not static bool is_numeric(const char * str){ - while (str != 0){ + while (str[0] != 0){ if (str[0] < 48 || str[0] > 57){return false;} ++str; } @@ -17,6 +17,7 @@ static bool is_numeric(const char * str){ ///Constructor that does the actual parsing HTTP::URL::URL(const std::string & url){ + IPv6Addr = false; //first detect protocol at the start, if any size_t proto_sep = url.find("://"); if (proto_sep != std::string::npos){ @@ -24,6 +25,9 @@ HTTP::URL::URL(const std::string & url){ proto_sep += 3; }else{ proto_sep = 0; + if (url.substr(0, 2) == "//"){ + proto_sep = 2; + } } //proto_sep now points to the start of the host, guaranteed //continue by finding the path, if any @@ -36,7 +40,7 @@ HTTP::URL::URL(const std::string & url){ } size_t hmark = path.find('#'); if (hmark != std::string::npos){ - frag = path.substr(hmark+1); + frag = Encodings::URL::decode(path.substr(hmark+1)); path.erase(hmark); } size_t qmark = path.find('?'); @@ -45,15 +49,28 @@ HTTP::URL::URL(const std::string & url){ path.erase(qmark); } if (path.size()){ + if (path[0] == '/'){ + path.erase(0, 1); + } size_t dots = path.find("/./"); while (dots != std::string::npos){ + DONTEVEN_MSG("%s (/./ -> /)", path.c_str()); path.erase(dots, 2); dots = path.find("/./"); } + dots = path.find("//"); + while (dots != std::string::npos){ + DONTEVEN_MSG("%s (// -> /)", path.c_str()); + path.erase(dots, 1); + dots = path.find("//"); + } + if (path[0] == '/'){ + path.erase(0, 1); + } dots = path.find("/../"); while (dots != std::string::npos){ size_t prevslash = path.rfind('/', dots-1); - if (prevslash == std::string::npos){ + if (prevslash == std::string::npos || dots == 0){ path.erase(0, dots+4); }else{ path.erase(prevslash+1, dots-prevslash+3); @@ -66,39 +83,59 @@ HTTP::URL::URL(const std::string & url){ if (path.substr(0, 3) == "../"){ path.erase(0, 3); } + path = Encodings::URL::decode(path); } } - //host and port are now definitely between proto_sep and first_slash - //we check for [ at the start because we may have an IPv6 address as host - if (url[proto_sep] == '['){ - //IPv6 address - find matching brace - size_t closing_brace = url.find(']', proto_sep); - //check if it exists at all - if (closing_brace == std::string::npos || closing_brace > first_slash){ - //assume host ends at first slash if there is no closing brace before it - closing_brace = first_slash; + //user, pass, host and port are now definitely between proto_sep and first_slash + std::string uphp = url.substr(proto_sep, first_slash-proto_sep);//user+pass+host+port + //Check if we have a user/pass before the host + size_t at_sign = uphp.find('@'); + if (at_sign != std::string::npos){ + std::string creds = uphp.substr(0, at_sign); + uphp.erase(0, at_sign+1); + size_t colon = creds.find(':'); + if (colon != std::string::npos){ + user = Encodings::URL::decode(creds.substr(0, colon)); + pass = Encodings::URL::decode(creds.substr(colon+1)); + }else{ + user = Encodings::URL::decode(creds); } - host = url.substr(proto_sep+1, closing_brace-(proto_sep+1)); + } + //we check for [ at the start because we may have an IPv6 address as host + if (uphp[0] == '['){ + //IPv6 address - find matching brace + IPv6Addr = true; + size_t closing_brace = uphp.find(']'); + host = uphp.substr(1, closing_brace-1); //continue by finding port, if any - size_t colon = url.rfind(':', first_slash); - if (colon == std::string::npos || colon <= closing_brace){ - //no port. Assume 80 - port = "80"; + size_t colon = uphp.find(':', closing_brace); + if (colon == std::string::npos){ + //no port. Assume default + port = ""; }else{ //we have a port number, read it - port = url.substr(colon+1, first_slash-(colon+1)); + port = uphp.substr(colon+1); + if (!is_numeric(port.c_str())){ + host += ":" + port; + port = ""; + } } }else{ //"normal" host - first find port, if any - size_t colon = url.rfind(':', first_slash); - if (colon == std::string::npos || colon < proto_sep){ + size_t colon = uphp.rfind(':'); + if (colon == std::string::npos){ //no port. Assume default port = ""; - host = url.substr(proto_sep, first_slash-proto_sep); + host = uphp; }else{ //we have a port number, read it - port = url.substr(colon+1, first_slash-(colon+1)); - host = url.substr(proto_sep, colon-proto_sep); + port = uphp.substr(colon+1); + host = uphp.substr(0, colon); + if (!is_numeric(port.c_str())){ + IPv6Addr = true; + host += ":" + port; + port = ""; + } } } //if the host is numeric, assume it is a port, instead @@ -121,25 +158,35 @@ uint32_t HTTP::URL::getPort() const{ ///Returns the default port for the protocol in numeric format uint32_t HTTP::URL::getDefaultPort() const{ + if (protocol == "http"){return 80;} if (protocol == "https"){return 443;} if (protocol == "rtmp"){return 1935;} if (protocol == "dtsc"){return 4200;} - return 80; + if (protocol == "rtsp"){return 554;} + return 0; } ///Returns the full URL in string format std::string HTTP::URL::getUrl() const{ std::string ret; if (protocol.size()){ - ret = protocol + "://" + host; + ret = protocol + "://"; }else{ - ret = "//" + host; + ret = "//"; + } + if (user.size() || pass.size()){ + ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@"; + } + if (IPv6Addr){ + ret += "[" + host + "]"; + }else{ + ret += host; } if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} ret += "/"; - if (path.size()){ret += path;} + if (path.size()){ret += Encodings::URL::encode(path);} if (args.size()){ret += "?" + args;} - if (frag.size()){ret += "#" + frag;} + if (frag.size()){ret += "#" + Encodings::URL::encode(frag);} return ret; } @@ -147,13 +194,21 @@ std::string HTTP::URL::getUrl() const{ std::string HTTP::URL::getBareUrl() const{ std::string ret; if (protocol.size()){ - ret = protocol + "://" + host; + ret = protocol + "://"; }else{ - ret = "//" + host; + ret = "//"; + } + if (user.size() || pass.size()){ + ret += Encodings::URL::encode(user) + ":" + Encodings::URL::encode(pass) + "@"; + } + if (IPv6Addr){ + ret += "[" + host + "]"; + }else{ + ret += host; } if (port.size() && getPort() != getDefaultPort()){ret += ":" + port;} ret += "/"; - if (path.size()){ret += path;} + if (path.size()){ret += Encodings::URL::encode(path);} return ret; } diff --git a/lib/http_parser.h b/lib/http_parser.h index fdeb32d1..0c4536d6 100644 --- a/lib/http_parser.h +++ b/lib/http_parser.h @@ -83,7 +83,10 @@ namespace HTTP { std::string path;///