Skip to content

Commit

Permalink
support load json index after loadsegment
Browse files Browse the repository at this point in the history
Signed-off-by: Xianhui.Lin <[email protected]>

improve statschecker unittest

Signed-off-by: Xianhui.Lin <[email protected]>

jsonindex expr code format

Signed-off-by: Xianhui.Lin <[email protected]>

fix go format

Signed-off-by: Xianhui.Lin <[email protected]>

fix controllerbasetest fail

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonindex memeroy leak

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonkey go format

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonindex go codeformat

Signed-off-by: Xianhui.Lin <[email protected]>

improve jsoninvert unitest

Signed-off-by: Xianhui.Lin <[email protected]>

delete unuse code

Signed-off-by: Xianhui.Lin <[email protected]>

refine test_json_key_index

Signed-off-by: Xianhui.Lin <[email protected]>

fix cpp unitest

Signed-off-by: Xianhui.Lin <[email protected]>

delete loginfo

Signed-off-by: Xianhui.Lin <[email protected]>

fix complie error

Signed-off-by: Xianhui.Lin <[email protected]>

fix codeformat

Signed-off-by: Xianhui.Lin <[email protected]>

fix createindex again hang up

Signed-off-by: Xianhui.Lin <[email protected]>

fix unitest

Signed-off-by: Xianhui.Lin <[email protected]>

fix createindex hang

Signed-off-by: Xianhui.Lin <[email protected]>

fix triggerstatstask go ut

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonindex filter error

Signed-off-by: Xianhui.Lin <[email protected]>

fix jsonindex filter error

Signed-off-by: Xianhui.Lin <[email protected]>

fix format

Signed-off-by: Xianhui.Lin <[email protected]>

improve jsonkey unitest

Signed-off-by: Xianhui.Lin <[email protected]>

remove sealsegment chunknum assert

Signed-off-by: Xianhui.Lin <[email protected]>
  • Loading branch information
JsDove committed Jan 6, 2025
1 parent 294902a commit 223a346
Show file tree
Hide file tree
Showing 41 changed files with 1,793 additions and 1,488 deletions.
83 changes: 1 addition & 82 deletions internal/core/src/common/Json.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,45 +71,6 @@ ExtractSubJson(const std::string& json, const std::vector<std::string>& keys) {
return buffer.GetString();
}

inline std::pair<std::string, std::string>
ParseTopLevelKey(const std::string& json_pointer, bool escaped = false) {
if (json_pointer.empty()) {
return {"", ""};
}

Assert(json_pointer[0] == '/');
size_t start = 1;
size_t end = json_pointer.find('/', start);

std::string top_key = (end == std::string::npos)
? json_pointer.substr(start)
: json_pointer.substr(start, end - start);

if (escaped) {
if (top_key.find("~0") != std::string::npos) {
top_key.replace(top_key.find("~0"), 2, "~");
}
if (top_key.find("~1") != std::string::npos) {
top_key.replace(top_key.find("~1"), 2, "/");
}
}

std::string remaining_path =
(end == std::string::npos) ? "" : json_pointer.substr(end);

return {top_key, remaining_path};
}

static std::string
ToLower(const std::string_view& str) {
std::string result(str);
std::transform(
result.begin(), result.end(), result.begin(), [](unsigned char c) {
return std::tolower(c);
});
return result;
}

using document = simdjson::ondemand::document;
template <typename T>
using value_result = simdjson::simdjson_result<T>;
Expand Down Expand Up @@ -230,8 +191,7 @@ class Json {

// it's always safe to add the padding,
// as we have allocated the memory with this padding
auto doc = parser.parse(data_.data() + offset,
length + simdjson::SIMDJSON_PADDING);
auto doc = parser.parse(data_.data() + offset, length);
AssertInfo(doc.error() == simdjson::SUCCESS,
"failed to parse the json {}: {}",
std::string(data_.data() + offset, length),
Expand Down Expand Up @@ -288,47 +248,6 @@ class Json {
return doc(offset, length).get<T>();
}

template <typename T>
std::pair<T, std::string>
at_pos(uint16_t offset, uint16_t length) const {
const char* pos = data_.data() + offset;
std::string_view str(pos, length);
if constexpr (std::is_same_v<T, bool>) {
if (milvus::ToLower(str) == "true") {
return {true, ""};
} else if (milvus::ToLower(str) == "false") {
return {false, ""};
} else {
return {false, "invalid boolean value"};
}
} else if constexpr (std::is_same_v<T, int64_t>) {
try {
size_t parsed_chars;
int64_t int_value = std::stoll(pos, &parsed_chars, 10);
if (parsed_chars == length) {
return {int_value, ""};
}
return {0, "string contains non-integer characters"};
} catch (...) {
return {0, "invalid integer string"};
}
} else if constexpr (std::is_same_v<T, double>) {
try {
size_t parsed_chars;
double double_value = std::stod(pos, &parsed_chars);
if (parsed_chars == length) {
return {double_value, ""};
}
return {0, "string contains non-integer characters"};
} catch (...) {
return {0, "invalid double string"};
}
} else {
static_assert(std::is_same_v<std::string_view, T>);
return {str, ""};
}
}

value_result<simdjson::dom::array>
array_at(uint16_t offset, uint16_t length) const {
return dom_doc(offset, length).get_array();
Expand Down
11 changes: 11 additions & 0 deletions internal/core/src/common/jsmn.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License

/*
* MIT License
*
Expand Down
144 changes: 76 additions & 68 deletions internal/core/src/exec/expression/BinaryRangeExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,17 +261,17 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForData(OffsetVector* input) {
TargetBitmapView valid_res(res_vec->GetValidRawData(), real_batch_size);
valid_res.set();

auto execute_sub_batch = [lower_inclusive,
upper_inclusive]<FilterType filter_type =
FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
auto execute_sub_batch =
[ lower_inclusive,
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
const T* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
HighPrecisionType val1,
HighPrecisionType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFunc<T, true, true, filter_type> func;
func(val1, val2, data, size, res, offsets);
Expand Down Expand Up @@ -366,18 +366,17 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJson(OffsetVector* input) {
ValueType val2 = GetValueFromProto<ValueType>(expr_->upper_val_);
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);

auto execute_sub_batch = [lower_inclusive,
upper_inclusive,
pointer]<FilterType filter_type =
FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
auto execute_sub_batch =
[ lower_inclusive, upper_inclusive,
pointer ]<FilterType filter_type = FilterType::sequential>(
const milvus::Json* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForJson<ValueType, true, true, filter_type>
func;
Expand Down Expand Up @@ -456,11 +455,23 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonForIndex() {
using GetType = std::conditional_t<std::is_same_v<ValueType, std::string>,
std::string_view,
ValueType>;
Assert(segment_->type() == SegmentType::Sealed && num_data_chunk_ == 1);
Assert(segment_->type() == SegmentType::Sealed);
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
? active_count_ - current_data_chunk_pos_

Check warning on line 460 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L458-L460

Added lines #L458 - L460 were not covered by tests
: batch_size_;
auto pointer = milvus::Json::pointer(expr_->column_.nested_path_);

Check warning on line 462 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L462

Added line #L462 was not covered by tests
#define BinaryRangeJSONIndexCompare(cmp) \
do { \
auto val = json.at<GetType>(offset, size); \
if (val.error()) { \
if constexpr (std::is_same_v<GetType, int64_t>) { \
auto val = json.at<double>(offset, size); \
return !val.error() && (cmp); \
} \
return false; \
} \
return (cmp); \
} while (false)
bool lower_inclusive = expr_->lower_inclusive_;
bool upper_inclusive = expr_->upper_inclusive_;
ValueType val1 = GetValueFromProto<ValueType>(expr_->lower_val_);
Expand All @@ -471,39 +482,36 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForJsonForIndex() {
auto field_id = expr_->column_.field_id_;
auto* index = sealed_seg->GetJsonKeyIndex(field_id);
Assert(index != nullptr);
auto filter_func =
[sealed_seg,
&field_id,
val1,
val2,
lower_inclusive,
upper_inclusive](uint32_t row_id, uint16_t offset, uint16_t size) {
auto json_pair = sealed_seg->GetJsonData(field_id, row_id);
if (!json_pair.second) {
return false;
}
auto json = milvus::Json(json_pair.first.data(),
json_pair.first.size());
auto val = json.at<GetType>(offset, size);
if (val.error()) {
return false;
}
if (lower_inclusive && upper_inclusive) {
return val1 <= ValueType(val.value()) &&
ValueType(val.value()) <= val2;
} else if (lower_inclusive && !upper_inclusive) {
return val1 <= ValueType(val.value()) &&
ValueType(val.value()) < val2;
} else if (!lower_inclusive && upper_inclusive) {
return val1 < ValueType(val.value()) &&
ValueType(val.value()) <= val2;
} else {
return val1 < ValueType(val.value()) &&
ValueType(val.value()) < val2;
}
};
auto filter_func = [sealed_seg,

Check warning on line 485 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L481-L485

Added lines #L481 - L485 were not covered by tests
&field_id,
val1,
val2,
lower_inclusive,
upper_inclusive](uint32_t row_id,
uint16_t offset,
uint16_t size) {
auto json_pair = sealed_seg->GetJsonData(field_id, row_id);
if (!json_pair.second) {

Check warning on line 494 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L493-L494

Added lines #L493 - L494 were not covered by tests
return false;
}
auto json =

Check warning on line 497 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L497

Added line #L497 was not covered by tests
milvus::Json(json_pair.first.data(), json_pair.first.size());
if (lower_inclusive && upper_inclusive) {
BinaryRangeJSONIndexCompare(val1 <= ValueType(val.value()) &&

Check warning on line 500 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L499-L500

Added lines #L499 - L500 were not covered by tests
ValueType(val.value()) <= val2);
} else if (lower_inclusive && !upper_inclusive) {
BinaryRangeJSONIndexCompare(val1 <= ValueType(val.value()) &&

Check warning on line 503 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L502-L503

Added lines #L502 - L503 were not covered by tests
ValueType(val.value()) < val2);
} else if (!lower_inclusive && upper_inclusive) {
BinaryRangeJSONIndexCompare(val1 < ValueType(val.value()) &&

Check warning on line 506 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L505-L506

Added lines #L505 - L506 were not covered by tests
ValueType(val.value()) <= val2);
} else {
BinaryRangeJSONIndexCompare(val1 < ValueType(val.value()) &&

Check warning on line 509 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L509

Added line #L509 was not covered by tests
ValueType(val.value()) < val2);
}
};
cached_index_chunk_res_ =
index->FilterByPath(pointer, real_batch_size, filter_func).clone();
index->FilterByPath(pointer, active_count_, filter_func).clone();
cached_index_chunk_id_ = 0;

Check warning on line 515 in internal/core/src/exec/expression/BinaryRangeExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/BinaryRangeExpr.cpp#L513-L515

Added lines #L513 - L515 were not covered by tests
}
TargetBitmap result;
Expand Down Expand Up @@ -540,18 +548,18 @@ PhyBinaryRangeFilterExpr::ExecRangeVisitorImplForArray(OffsetVector* input) {
index = std::stoi(expr_->column_.nested_path_[0]);
}

auto execute_sub_batch = [lower_inclusive,
upper_inclusive]<FilterType filter_type =
FilterType::sequential>(
const milvus::ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
auto execute_sub_batch =
[ lower_inclusive,
upper_inclusive ]<FilterType filter_type = FilterType::sequential>(
const milvus::ArrayView* data,
const bool* valid_data,
const int32_t* offsets,
const int size,
TargetBitmapView res,
TargetBitmapView valid_res,
ValueType val1,
ValueType val2,
int index) {
if (lower_inclusive && upper_inclusive) {
BinaryRangeElementFuncForArray<ValueType, true, true, filter_type>
func;
Expand Down
26 changes: 13 additions & 13 deletions internal/core/src/exec/expression/ExistsExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,18 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {
TargetBitmapView res,
TargetBitmapView valid_res,
const std::string& pointer) {
for (int i = 0; i < size; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = data[offset].exist(pointer);
for (int i = 0; i < size; ++i) {
auto offset = i;
if constexpr (filter_type == FilterType::random) {
offset = (offsets) ? offsets[i] : i;
}
};
if (valid_data != nullptr && !valid_data[offset]) {
res[i] = valid_res[i] = false;
continue;
}
res[i] = data[offset].exist(pointer);
}
};

int64_t processed_size;
if (has_offset_input_) {
Expand All @@ -102,7 +102,7 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegment(OffsetVector* input) {

VectorPtr
PhyExistsFilterExpr::EvalJsonExistsForDataSegmentForIndex() {
Assert(segment_->type() == SegmentType::Sealed && num_data_chunk_ == 1);
Assert(segment_->type() == SegmentType::Sealed);
auto real_batch_size = current_data_chunk_pos_ + batch_size_ > active_count_
? active_count_ - current_data_chunk_pos_

Check warning on line 107 in internal/core/src/exec/expression/ExistsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/ExistsExpr.cpp#L104-L107

Added lines #L104 - L107 were not covered by tests
: batch_size_;
Expand All @@ -125,7 +125,7 @@ PhyExistsFilterExpr::EvalJsonExistsForDataSegmentForIndex() {
return json.exist(pointer);
};
cached_index_chunk_res_ =
index->FilterByPath(pointer, real_batch_size, filter_func).clone();
index->FilterByPath(pointer, active_count_, filter_func).clone();
cached_index_chunk_id_ = 0;

Check warning on line 129 in internal/core/src/exec/expression/ExistsExpr.cpp

View check run for this annotation

Codecov / codecov/patch

internal/core/src/exec/expression/ExistsExpr.cpp#L125-L129

Added lines #L125 - L129 were not covered by tests
}
TargetBitmap result;
Expand Down
Loading

0 comments on commit 223a346

Please sign in to comment.