From 8d9c104b8ede383975b85eee96c63f62d8c1a24a Mon Sep 17 00:00:00 2001 From: Andrea Pappacoda Date: Sun, 24 Sep 2023 18:41:57 +0200 Subject: [PATCH] wip --- include/pistache/http.h | 2 + include/pistache/http_header.h | 5 + main.cpp | 187 +++++++++++++++++++++++++++++++++ src/common/http.cc | 10 ++ src/common/http_header.cc | 22 ++++ src/common/mime.cc | 9 +- 6 files changed, 230 insertions(+), 5 deletions(-) create mode 100644 main.cpp diff --git a/include/pistache/http.h b/include/pistache/http.h index cea7d9515..16a746028 100644 --- a/include/pistache/http.h +++ b/include/pistache/http.h @@ -217,6 +217,8 @@ namespace Pistache std::chrono::milliseconds timeout() const; + Header::Encoding getBestAcceptEncoding() const; + private: #ifdef LIBSTDCPP_SMARTPTR_LOCK_FIXME void associatePeer(const std::shared_ptr& peer) diff --git a/include/pistache/http_header.h b/include/pistache/http_header.h index 0f1cf198a..1b841686f 100644 --- a/include/pistache/http_header.h +++ b/include/pistache/http_header.h @@ -371,6 +371,11 @@ namespace Pistache::Http::Header explicit AcceptEncoding(Encoding encoding) : EncodingHeader(encoding) { } + + void parseRaw(const char* str, size_t len) override; + + private: + std::vector encodingsSorted_; }; class ContentEncoding : public EncodingHeader diff --git a/main.cpp b/main.cpp new file mode 100644 index 000000000..7657b2956 --- /dev/null +++ b/main.cpp @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +/* + * This function parses a non-NULL terminated C string and interprets it as + * a float. The str must represent a number following the HTTP definition + * of Quality Values: + * + * qvalue = ( "0" [ "." 0*3DIGIT ] ) + * / ( "1" [ "." 0*3("0") ] ) + * + * https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.1 + */ +static bool str_to_qvalue(const char* str, float* qvalue, std::size_t* qvalue_len) +{ + constexpr char offset = '0'; + + *qvalue_len = 0; + + // It is useless to read more than 6 chars, as the maximum allowed + // number of digits after the dot is 3, so n.nnn is 5. + // The 6th character is read to check if the user specified a qvalue + // with too many digits. + for (; *qvalue_len < 6; (*qvalue_len)++) + { + // the decimal dot is only allowed at index 1; + // 0.15 ok + // 1.10 ok + // 1.0.1 no + // .40 no + if (str[*qvalue_len] == '.' && *qvalue_len != 1) + { + return false; + } + + // The only valid characters are digits and the decimal dot, + // anything else signals the end of the string + if (str[*qvalue_len] != '.' && !std::isdigit(str[*qvalue_len])) + { + break; + } + } + + // Guards against numbers like: + // empty + // 1. + // 0.1234 + if (*qvalue_len < 1 || *qvalue_len == 2 || *qvalue_len > 5) + { + return false; + } + + // The first char can only be 0 or 1 + if (str[0] != '0' && str[0] != '1') + { + return false; + } + + int qint = 0; + + switch (*qvalue_len) + { + case 5: + qint += (str[4] - offset); + [[fallthrough]]; + case 4: + qint += (str[3] - offset) * 10; + [[fallthrough]]; + case 3: + qint += (str[2] - offset) * 100; + [[fallthrough]]; + case 1: + qint += (str[0] - offset) * 1000; + } + + *qvalue = static_cast(qint) / 1000.0F; + + if (*qvalue > 1) + { + return false; + } + + return true; +} + +/* + * Tokens are short textual identifiers that do not include whitespace or delimiters. + * + * tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" + * / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" + * / DIGIT / ALPHA + */ +static bool is_http_token(const unsigned char c) { + return c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' + || c == '*' || c == '+' || c == '-' || c == '.' || c == '^' || c == '_' + || c == '`' || c == '|' || c == '~' || std::isalnum(c); +} + +/* + * Delimiters are chosen from the set of US-ASCII visual characters not allowed + * in a token. + */ +static bool is_http_delimiter(const unsigned char c) { + return std::isgraph(c) && !is_http_token(c); +} + +static bool is_http_space(const unsigned char c) { + return std::isblank(c); +} + +std::optional> getAcceptEncodings(const size_t len, const char* const str) { + std::unordered_map encodings; + + const char* const str_end = str + len; + const char* start = str; + + while (*start != '\0') { + // Per RFC 9110, if no "q" parameter is present, the default weight is 1 + float qvalue = 1; + + const char* const end = std::find(start, str_end, ','); + + const char* const token_end = std::find_if_not(start, end, is_http_token); + + // If no semicolon is found, it means that no q-value is present + const char* const semicolon = std::find(token_end, end, ';'); + if (semicolon != end) { + // Skip optional white space + const char* ows_end = std::find_if_not(semicolon + std::strlen(";"), end, is_http_space); + + if (ows_end[0] != 'q' || ows_end[1] != '=') { + // "q=" is expected after the optional white space. If there + // isn't, this is a malformed header + return std::nullopt; + } + + const char* const value_str = ows_end + std::strlen("q="); + + std::size_t qvalue_len; + const bool valid = str_to_qvalue(value_str, &qvalue, &qvalue_len); + if (!valid) { + return std::nullopt; + } + } + + encodings.emplace(std::string(start, token_end), qvalue); + + // Go to the next token for the next iteration + start = std::find_if(end, str_end, is_http_token); + } + + return std::optional(encodings); +} + +/* + * Accept-Encoding = #( codings [ weight ] ) + * codings = content-coding / "identity" / "*" + * + * A construct "#" is defined for defining comma-delimited lists of elements. + * Each is separated by a single comma (",") and optional whitespace (OWS). + */ +int main([[maybe_unused]] int argc, char* argv[]) { + assert(argc == 2); + + const char* const str = argv[1]; + + std::optional encodings = getAcceptEncodings(std::strlen(str), str); + if (!encodings.has_value()) { + return 1; + } + + for (const auto& encoding : encodings.value()) { + std::printf("encoding: %s, qvalue: %f\n", encoding.first.c_str(), encoding.second); + } +} + diff --git a/src/common/http.cc b/src/common/http.cc index 98b7c2ec7..391f35320 100644 --- a/src/common/http.cc +++ b/src/common/http.cc @@ -10,6 +10,7 @@ Http layer implementation */ +#include "pistache/http_header.h" #include #include #include @@ -650,6 +651,15 @@ namespace Pistache::Http std::chrono::milliseconds Request::timeout() const { return timeout_; } + Header::Encoding Request::getBestAcceptEncoding() const { + const auto maybe_header = headers().tryGet(); + if (maybe_header == nullptr) { + return Header::Encoding::Identity; + } + const auto header = maybe_header; + + } + Response::Response(Version version) : Message(version) { } diff --git a/src/common/http_header.cc b/src/common/http_header.cc index 16d5b9fe9..c6cda7c91 100644 --- a/src/common/http_header.cc +++ b/src/common/http_header.cc @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -642,6 +643,27 @@ namespace Pistache::Http::Header } } + void AcceptEncoding::parseRaw(const char* str, size_t len) + { + // leggo un nome + // se trovo un ; allora dopo c'è un q-value - non memorizzo le priorità, + // ma inserisco già ordinati + // finito il q-value, se c'è uno spazio, ripeto. + std::vector priorities; + std::size_t position = 0; + auto foundPos = std::find(str, str + len, ' '); + while (position < len) { + if (str[position] == '*') { + break; + } + const std::regex encodingRegex("([[:alpha:]]+)"); + std::cmatch match; + std::regex_search(str, str + len, match, encodingRegex); + strncasecmp(str + position, "deflate", foundPos); + position = foundPos; + } + } + void EncodingHeader::write(std::ostream& os) const { os << encodingString(encoding_); diff --git a/src/common/mime.cc b/src/common/mime.cc index 5b8f50137..e8758bdfd 100644 --- a/src/common/mime.cc +++ b/src/common/mime.cc @@ -108,12 +108,11 @@ namespace Pistache::Http::Mime return "q=1"; char buff[sizeof("q=0.99")] = {}; - if (val_ % 10 == 0) - snprintf(buff, sizeof buff, "q=%.1f", val_ / 100.0); - else - snprintf(buff, sizeof buff, "q=%.2f", val_ / 100.0); + const char* const format = val_ % 10 == 0 ? "q=%.1f" : "q=%.2f"; + + const int len = snprintf(buff, sizeof(buff), format, val_ / 100.0); - return std::string(buff); + return std::string(buff, len); } MediaType MediaType::fromString(const std::string& str)