Skip to content

Commit

Permalink
fast case conversion and comparison
Browse files Browse the repository at this point in the history
  • Loading branch information
lihuiba committed Dec 18, 2024
1 parent 2483980 commit 040d8a9
Show file tree
Hide file tree
Showing 4 changed files with 123 additions and 9 deletions.
46 changes: 45 additions & 1 deletion common/estring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,48 @@ static_assert(
static_assert(
std::is_same<estring&, decltype(std::declval<estring>() += std::declval<std::string_view>())>::value,
"estring += std::string_view should return estring"
);
);

namespace photon {

void tolower_fast(char* out, const char* in, size_t len) {
size_t i = 0;
for (; i < len/8*8; i+=8)
*(uint64_t*)&out[i] = tolower_fast8(*(uint64_t*)&in[i]);
for (; i < len; i++)
out[i] = tolower_fast(in[i]);
out[len] = '\0';
}

void toupper_fast(char* out, const char* in, size_t len) {
size_t i = 0;
for (; i < len/8*8; i += 8)
*(uint64_t*)&out[i] = toupper_fast8(*(uint64_t*)&in[i]);
for (;i < len; i++)
out[i] = toupper_fast(in[i]);
out[len] = '\0';
}


int stricmp_fast(std::string_view a, std::string_view b) {
size_t i = 0, min = std::min(a.size(), b.size());
for (; i < min/8*8; i+=8) {
auto ca = tolower_fast8(*(uint64_t*)&a[i]);
auto cb = tolower_fast8(*(uint64_t*)&b[i]);
if (ca == cb) continue;
auto c = ca - cb;
for (; c; c>>=8) {
auto delta = (char)(c & 0xff);
if (delta) return delta;
}
}
for (; i < min; i++) {
auto ca = tolower_fast(a[i]);
auto cb = tolower_fast(b[i]);
auto delta = ca - cb;
if (delta) return delta;
}
return int(a.size() - b.size());
}

}
39 changes: 39 additions & 0 deletions common/estring.h
Original file line number Diff line number Diff line change
Expand Up @@ -658,3 +658,42 @@ struct hash<estring> {
};
} // namespace std

namespace photon {

inline char tolower_fast(char c) {
return c + ('a' - 'A') * ('A' <= c && c <= 'Z');
}

inline char toupper_fast(char c) {
return c - ('a' - 'A') * ('a' <= c && c <= 'z');
}

inline uint64_t tolower_fast8(uint64_t x) {
uint64_t all_bytes = 0x0101010101010101;
uint64_t heptets = x & (0x7f * all_bytes);
uint64_t is_ascii = ~x & (0x80 * all_bytes);
uint64_t is_gt_Z = heptets + (0x7f - 'Z') * all_bytes;
uint64_t is_ge_A = heptets + (0x80 - 'A') * all_bytes;
uint64_t is_upper = (is_ge_A ^ is_gt_Z) & is_ascii;
return x | (is_upper >> 2);
}

inline uint64_t toupper_fast8(uint64_t x) {
uint64_t all_bytes = 0x0101010101010101;
uint64_t heptets = x & (0x7f * all_bytes);
uint64_t is_ascii = ~x & (0x80 * all_bytes);
uint64_t is_gt_z = heptets + (0x7f - 'z') * all_bytes;
uint64_t is_ge_a = heptets + (0x80 - 'a') * all_bytes;
uint64_t is_lower = (is_ge_a ^ is_gt_z) & is_ascii;
return x ^ (is_lower >> 2);
}

// convert string to lower or upper, the storage of out must be >= len + 1
// it's possible that out == in
void tolower_fast(char* out, const char* in, size_t len);
void toupper_fast(char* out, const char* in, size_t len);

// compare 2 strings without case sensitive
int stricmp_fast(std::string_view a, std::string_view b);

} // namespace photon
14 changes: 6 additions & 8 deletions common/string-keyed.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.

#pragma once

#include <photon/common/estring.h>
#include <photon/common/string_view.h>
#include <photon/common/hash_combine.h>
#include <cstring>
Expand Down Expand Up @@ -159,20 +160,18 @@ using unordered_map_string_key = basic_map_string_key<

class Hasher_CaseInsensitive {
const size_t BUF_CAP = 64;
size_t partial_hash(std::string_view sv, size_t i, size_t n) const {
size_t partial_hash(std::string_view sv) const {
char buf[BUF_CAP];
sv = sv.substr(i, n);
assert(sv.size() <= BUF_CAP);
for (size_t j = 0; j < sv.size(); ++j)
buf[j] = (char)tolower(sv[j]);
return std::hash<std::string_view>()({buf, n});
photon::tolower_fast(buf, sv.data(), sv.size());
return std::hash<std::string_view>()({buf, sv.size()});
}
public:
size_t operator()(std::string_view sv) const {
size_t h = 0;
for (size_t i = 0; i < sv.size(); i += BUF_CAP) {
auto len = std::min(BUF_CAP, sv.size() - i);
auto ph = partial_hash(sv, i, len);
auto ph = partial_hash(sv.substr(i, len));
h = photon::hash_combine(h, ph);
}
return h;
Expand All @@ -182,8 +181,7 @@ class Hasher_CaseInsensitive {
class Equal_CaseInsensitive {
public:
bool operator()(std::string_view a, std::string_view b) const {
return a.size() == b.size() && strncasecmp(
a.begin(), b.begin(), a.size()) == 0;
return a.size() == b.size() && photon::stricmp_fast(a, b) == 0;
}
};

Expand Down
33 changes: 33 additions & 0 deletions common/test/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ limitations under the License.
#include <vector>
#include <memory>
#include <string>
#include <string.h>
//#include <gmock/gmock.h>
//#include <malloc.h>
#ifndef __clang__
Expand Down Expand Up @@ -1273,6 +1274,38 @@ TEST(PooledAllocator, allocFailed) {
EXPECT_EQ(nullptr, p2);
}

TEST(tolowerupper, basic) {
EXPECT_EQ(tolower_fast('A'), 'a');
EXPECT_EQ(tolower_fast('3'), '3');
EXPECT_EQ(tolower_fast('Z'), 'z');
EXPECT_EQ(toupper_fast('a'), 'A');
EXPECT_EQ(toupper_fast('3'), '3');
EXPECT_EQ(toupper_fast('z'), 'Z');

const static char s1[]="abC1dEf2%^&", s2[]="ABc1DeF2%^&";
EXPECT_EQ(toupper_fast8(*(uint64_t*)s1),
toupper_fast8(*(uint64_t*)s2));
EXPECT_EQ(tolower_fast8(*(uint64_t*)s1),
tolower_fast8(*(uint64_t*)s2));
EXPECT_EQ(stricmp_fast(string_view(s1), string_view(s2)), 0);
}

const static char S1[]="ABCDEFGHIJKLMNOPQRSTUVWXYZ",
S2[]="abcdefghijklmnopqrstuvwxyz";
TEST(tolowerupper, perf_strncasecmp) {
for (int i = 1000000; i; --i) {
auto ret = strncasecmp(S1, S2, LEN(S1) - 1);
asm volatile(""::"r"(ret));
}
}

TEST(tolowerupper, perf_photon_stricmp) {
for (int i = 1000000; i; --i) {
auto ret = stricmp_fast(S1, S2);
asm volatile(""::"r"(ret));
}
}

TEST(update_now, after_idle_sleep) {
thread_yield(); // update now
auto before = photon::now;
Expand Down

0 comments on commit 040d8a9

Please sign in to comment.