mirror of https://github.com/ByConity/ByConity
Optimized marks selection algorithm for continuous marks ranges
This commit is contained in:
parent
7aa3f86ab9
commit
7465e00163
|
@ -1498,79 +1498,55 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange(
|
|||
}
|
||||
else
|
||||
{
|
||||
// Do inclusion search, where we only look for one range
|
||||
// For the case of one continuous range of keys we use binary search algorithm
|
||||
|
||||
LOG_TRACE(log, "Running binary search on index range for part {} ({} marks)", part->name, marks_count);
|
||||
|
||||
size_t steps = 0;
|
||||
|
||||
auto find_leaf = [&](bool left) -> std::optional<size_t>
|
||||
MarkRange result_range;
|
||||
|
||||
size_t searched_left = 0;
|
||||
size_t searched_right = marks_count;
|
||||
|
||||
while (searched_left + 1 < searched_right)
|
||||
{
|
||||
std::vector<MarkRange> stack = {};
|
||||
|
||||
MarkRange range = {0, marks_count};
|
||||
|
||||
steps++;
|
||||
|
||||
const size_t middle = (searched_left + searched_right) / 2;
|
||||
MarkRange range(0, middle);
|
||||
if (may_be_true_in_range(range))
|
||||
stack.emplace_back(range.begin, range.end);
|
||||
searched_right = middle;
|
||||
else
|
||||
searched_left = middle;
|
||||
++steps;
|
||||
}
|
||||
result_range.begin = searched_left;
|
||||
LOG_TRACE(log, "Found (LEFT) boundary mark: {}", searched_left);
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
range = stack.back();
|
||||
stack.pop_back();
|
||||
searched_right = marks_count;
|
||||
while (searched_left + 1 < searched_right)
|
||||
{
|
||||
const size_t middle = (searched_left + searched_right) / 2;
|
||||
MarkRange range(middle, marks_count);
|
||||
if (may_be_true_in_range(range))
|
||||
searched_left = middle;
|
||||
else
|
||||
searched_right = middle;
|
||||
++steps;
|
||||
}
|
||||
result_range.end = searched_right;
|
||||
LOG_TRACE(log, "Found (RIGHT) boundary mark: {}", searched_right);
|
||||
|
||||
if (range.end == range.begin + 1)
|
||||
{
|
||||
if (left)
|
||||
return range.begin;
|
||||
else
|
||||
return range.end;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::vector<MarkRange> check_order = {};
|
||||
|
||||
MarkRange left_range = {range.begin, (range.begin + range.end) / 2};
|
||||
MarkRange right_range = {(range.begin + range.end) / 2, range.end};
|
||||
if (may_be_true_in_range(result_range))
|
||||
res.emplace_back(std::move(result_range));
|
||||
|
||||
if (left)
|
||||
{
|
||||
check_order.emplace_back(left_range.begin, left_range.end);
|
||||
check_order.emplace_back(right_range.begin, right_range.end);
|
||||
}
|
||||
else
|
||||
{
|
||||
check_order.emplace_back(right_range.begin, right_range.end);
|
||||
check_order.emplace_back(left_range.begin, left_range.end);
|
||||
}
|
||||
|
||||
steps++;
|
||||
|
||||
if (may_be_true_in_range(check_order[0]))
|
||||
{
|
||||
stack.emplace_back(check_order[0].begin, check_order[0].end);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (may_be_true_in_range(check_order[1]))
|
||||
stack.emplace_back(check_order[1].begin, check_order[1].end);
|
||||
else
|
||||
break; // No mark range would suffice
|
||||
}
|
||||
}
|
||||
|
||||
return std::nullopt;
|
||||
};
|
||||
|
||||
auto left_leaf = find_leaf(true);
|
||||
if (left_leaf)
|
||||
res.emplace_back(left_leaf.value(), find_leaf(false).value());
|
||||
|
||||
LOG_TRACE(log, "Used optimized inclusion search over index for part {} with {} steps", part->name, steps);
|
||||
LOG_TRACE(log, "Found {} range in {} steps", res.empty() ? "empty" : "continuous", steps);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
MarkRanges MergeTreeDataSelectExecutor::filterMarksUsingIndex(
|
||||
MergeTreeIndexPtr index_helper,
|
||||
MergeTreeIndexConditionPtr condition,
|
||||
|
|
Loading…
Reference in New Issue