-
Notifications
You must be signed in to change notification settings - Fork 703
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
6 changed files
with
230 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
#include <algorithm> | ||
#include <cassert> | ||
#include <cctype> | ||
#include <cstdio> | ||
#include <optional> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <vector> | ||
#include <cstddef> | ||
#include <regex> | ||
#include <cstring> | ||
|
||
#include <strings.h> | ||
|
||
/* | ||
* This function parses a non-NULL terminated C string and interprets it as | ||
* a float. The str must represent a number following the HTTP definition | ||
* of Quality Values: | ||
* | ||
* qvalue = ( "0" [ "." 0*3DIGIT ] ) | ||
* / ( "1" [ "." 0*3("0") ] ) | ||
* | ||
* https://datatracker.ietf.org/doc/html/rfc7231#section-5.3.1 | ||
*/ | ||
static bool str_to_qvalue(const char* str, float* qvalue, std::size_t* qvalue_len) | ||
{ | ||
constexpr char offset = '0'; | ||
|
||
*qvalue_len = 0; | ||
|
||
// It is useless to read more than 6 chars, as the maximum allowed | ||
// number of digits after the dot is 3, so n.nnn is 5. | ||
// The 6th character is read to check if the user specified a qvalue | ||
// with too many digits. | ||
for (; *qvalue_len < 6; (*qvalue_len)++) | ||
{ | ||
// the decimal dot is only allowed at index 1; | ||
// 0.15 ok | ||
// 1.10 ok | ||
// 1.0.1 no | ||
// .40 no | ||
if (str[*qvalue_len] == '.' && *qvalue_len != 1) | ||
{ | ||
return false; | ||
} | ||
|
||
// The only valid characters are digits and the decimal dot, | ||
// anything else signals the end of the string | ||
if (str[*qvalue_len] != '.' && !std::isdigit(str[*qvalue_len])) | ||
{ | ||
break; | ||
} | ||
} | ||
|
||
// Guards against numbers like: | ||
// empty | ||
// 1. | ||
// 0.1234 | ||
if (*qvalue_len < 1 || *qvalue_len == 2 || *qvalue_len > 5) | ||
{ | ||
return false; | ||
} | ||
|
||
// The first char can only be 0 or 1 | ||
if (str[0] != '0' && str[0] != '1') | ||
{ | ||
return false; | ||
} | ||
|
||
int qint = 0; | ||
|
||
switch (*qvalue_len) | ||
{ | ||
case 5: | ||
qint += (str[4] - offset); | ||
[[fallthrough]]; | ||
case 4: | ||
qint += (str[3] - offset) * 10; | ||
[[fallthrough]]; | ||
case 3: | ||
qint += (str[2] - offset) * 100; | ||
[[fallthrough]]; | ||
case 1: | ||
qint += (str[0] - offset) * 1000; | ||
} | ||
|
||
*qvalue = static_cast<short>(qint) / 1000.0F; | ||
|
||
if (*qvalue > 1) | ||
{ | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
/* | ||
* Tokens are short textual identifiers that do not include whitespace or delimiters. | ||
* | ||
* tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" | ||
* / "+" / "-" / "." / "^" / "_" / "`" / "|" / "~" | ||
* / DIGIT / ALPHA | ||
*/ | ||
static bool is_http_token(const unsigned char c) { | ||
return c == '!' || c == '#' || c == '$' || c == '%' || c == '&' || c == '\'' | ||
|| c == '*' || c == '+' || c == '-' || c == '.' || c == '^' || c == '_' | ||
|| c == '`' || c == '|' || c == '~' || std::isalnum(c); | ||
} | ||
|
||
/* | ||
* Delimiters are chosen from the set of US-ASCII visual characters not allowed | ||
* in a token. | ||
*/ | ||
static bool is_http_delimiter(const unsigned char c) { | ||
return std::isgraph(c) && !is_http_token(c); | ||
} | ||
|
||
static bool is_http_space(const unsigned char c) { | ||
return std::isblank(c); | ||
} | ||
|
||
std::optional<std::unordered_map<std::string, float>> getAcceptEncodings(const size_t len, const char* const str) { | ||
std::unordered_map<std::string, float> encodings; | ||
|
||
const char* const str_end = str + len; | ||
const char* start = str; | ||
|
||
while (*start != '\0') { | ||
// Per RFC 9110, if no "q" parameter is present, the default weight is 1 | ||
float qvalue = 1; | ||
|
||
const char* const end = std::find(start, str_end, ','); | ||
|
||
const char* const token_end = std::find_if_not(start, end, is_http_token); | ||
|
||
// If no semicolon is found, it means that no q-value is present | ||
const char* const semicolon = std::find(token_end, end, ';'); | ||
if (semicolon != end) { | ||
// Skip optional white space | ||
const char* ows_end = std::find_if_not(semicolon + std::strlen(";"), end, is_http_space); | ||
|
||
if (ows_end[0] != 'q' || ows_end[1] != '=') { | ||
// "q=" is expected after the optional white space. If there | ||
// isn't, this is a malformed header | ||
return std::nullopt; | ||
} | ||
|
||
const char* const value_str = ows_end + std::strlen("q="); | ||
|
||
std::size_t qvalue_len; | ||
const bool valid = str_to_qvalue(value_str, &qvalue, &qvalue_len); | ||
if (!valid) { | ||
return std::nullopt; | ||
} | ||
} | ||
|
||
encodings.emplace(std::string(start, token_end), qvalue); | ||
|
||
// Go to the next token for the next iteration | ||
start = std::find_if(end, str_end, is_http_token); | ||
} | ||
|
||
return std::optional(encodings); | ||
} | ||
|
||
/* | ||
* Accept-Encoding = #( codings [ weight ] ) | ||
* codings = content-coding / "identity" / "*" | ||
* | ||
* A construct "#" is defined for defining comma-delimited lists of elements. | ||
* Each is separated by a single comma (",") and optional whitespace (OWS). | ||
*/ | ||
int main([[maybe_unused]] int argc, char* argv[]) { | ||
assert(argc == 2); | ||
|
||
const char* const str = argv[1]; | ||
|
||
std::optional encodings = getAcceptEncodings(std::strlen(str), str); | ||
if (!encodings.has_value()) { | ||
return 1; | ||
} | ||
|
||
for (const auto& encoding : encodings.value()) { | ||
std::printf("encoding: %s, qvalue: %f\n", encoding.first.c_str(), encoding.second); | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters