forked from mindspore-Ecosystem/mindspore
!2188 Remove B+ tree deadcode and add an additional output to Search function
Merge pull request !2188 from JesseKLee/deadcode
This commit is contained in:
commit
a1f194c971
|
@ -408,8 +408,7 @@ Status ClueOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
auto file_it = filename_index_->Search(*it);
|
||||
file_index.emplace_back(std::pair<std::string, int64_t>(file_it.value(), *it));
|
||||
file_index.emplace_back(std::pair<std::string, int64_t>((*filename_index_)[*it], *it));
|
||||
}
|
||||
} else {
|
||||
for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) {
|
||||
|
|
|
@ -72,8 +72,9 @@ Status FilenameBlock::GetFilename(std::string *out_filename, const AutoIndexObj<
|
|||
RETURN_IF_NOT_OK(IOBlock::GetKey(&fetched_key));
|
||||
|
||||
// Do an index lookup using that key to get the filename.
|
||||
auto it = index.Search(fetched_key);
|
||||
if (it != index.end()) {
|
||||
auto r = index.Search(fetched_key);
|
||||
if (r.second) {
|
||||
auto &it = r.first;
|
||||
*out_filename = it.value();
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Could not find filename from index");
|
||||
|
|
|
@ -314,8 +314,7 @@ Status TextFileOp::FillIOBlockQueue(const std::vector<int64_t> &i_keys) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
auto file_it = filename_index_->Search(*it);
|
||||
file_index.emplace_back(std::pair<std::string, int64_t>(file_it.value(), *it));
|
||||
file_index.emplace_back(std::pair<std::string, int64_t>((*filename_index_)[*it], *it));
|
||||
}
|
||||
} else {
|
||||
for (auto it = filename_index_->begin(); it != filename_index_->end(); ++it) {
|
||||
|
|
|
@ -451,8 +451,7 @@ Status TFReaderOp::FillIOBlockShuffle(const std::vector<int64_t> &i_keys) {
|
|||
}
|
||||
} else {
|
||||
// Do an index lookup using that key to get the filename.
|
||||
auto file_it = filename_index_->Search(*it);
|
||||
std::string file_name = file_it.value();
|
||||
std::string file_name = (*filename_index_)[*it];
|
||||
if (NeedPushFileToblockQueue(file_name, &start_offset, &end_offset, pre_count)) {
|
||||
auto ioBlock = std::make_unique<FilenameBlock>(*it, start_offset, end_offset, IOBlock::kDeIoBlockNone);
|
||||
RETURN_IF_NOT_OK(PushIoBlockQueue(queue_index, std::move(ioBlock)));
|
||||
|
|
|
@ -40,8 +40,6 @@ struct BPlusTreeTraits {
|
|||
static constexpr slot_type kLeafSlots = 256;
|
||||
// Number of slots in each inner node of the tree
|
||||
static constexpr slot_type kInnerSlots = 128;
|
||||
// If kAppendMode is true, we will split high instead of 50/50 split
|
||||
static constexpr bool kAppendMode = false;
|
||||
};
|
||||
|
||||
/// Implementation of B+ tree
|
||||
|
@ -123,19 +121,14 @@ class BPlusTree {
|
|||
std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
|
||||
std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
|
||||
|
||||
void PopulateNumKeys();
|
||||
|
||||
key_type KeyAtPos(uint64_t inx);
|
||||
|
||||
// Statistics
|
||||
struct tree_stats {
|
||||
std::atomic<uint64_t> size_;
|
||||
uint32_t leaves_;
|
||||
uint32_t inner_nodes_;
|
||||
uint32_t level_;
|
||||
bool num_keys_array_valid_;
|
||||
|
||||
tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0), num_keys_array_valid_(false) {}
|
||||
tree_stats() : size_(0), leaves_(0), inner_nodes_(0), level_(0) {}
|
||||
};
|
||||
|
||||
private:
|
||||
|
@ -160,10 +153,6 @@ class BPlusTree {
|
|||
Node<BaseNode> lru_;
|
||||
};
|
||||
|
||||
uint64_t PopulateNumKeys(BaseNode *n);
|
||||
|
||||
key_type KeyAtPos(BaseNode *n, uint64_t inx);
|
||||
|
||||
// This control block keeps track of all the nodes we traverse on insert.
|
||||
// To maximize concurrency, internal nodes are latched S. If a node split
|
||||
// is required, we must releases all the latches and redo it again and change
|
||||
|
@ -255,7 +244,6 @@ class BPlusTree {
|
|||
slot_type slot_dir_[traits::kInnerSlots] = {0};
|
||||
key_type keys_[traits::kInnerSlots] = {0};
|
||||
BaseNode *data_[traits::kInnerSlots + 1] = {nullptr};
|
||||
uint64_t num_keys_[traits::kInnerSlots + 1] = {0};
|
||||
slot_type slotuse_;
|
||||
};
|
||||
|
||||
|
@ -391,7 +379,6 @@ class BPlusTree {
|
|||
Iterator operator--(int);
|
||||
|
||||
bool operator==(const Iterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
|
||||
|
||||
bool operator!=(const Iterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
|
||||
|
||||
private:
|
||||
|
@ -441,7 +428,6 @@ class BPlusTree {
|
|||
ConstIterator operator--(int);
|
||||
|
||||
bool operator==(const ConstIterator &x) const { return (x.cur_ == cur_) && (x.slot_ == slot_); }
|
||||
|
||||
bool operator!=(const ConstIterator &x) const { return (x.cur_ != cur_) || (x.slot_ != slot_); }
|
||||
|
||||
private:
|
||||
|
@ -451,20 +437,17 @@ class BPlusTree {
|
|||
};
|
||||
|
||||
Iterator begin();
|
||||
|
||||
Iterator end();
|
||||
|
||||
ConstIterator begin() const;
|
||||
|
||||
ConstIterator end() const;
|
||||
|
||||
ConstIterator cbegin() const;
|
||||
|
||||
ConstIterator cend() const;
|
||||
|
||||
// Locate the entry with key
|
||||
ConstIterator Search(const key_type &key) const;
|
||||
Iterator Search(const key_type &key);
|
||||
std::pair<ConstIterator, bool> Search(const key_type &key) const;
|
||||
std::pair<Iterator, bool> Search(const key_type &key);
|
||||
|
||||
value_type operator[](key_type key);
|
||||
};
|
||||
|
|
|
@ -269,26 +269,17 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::LeafInsertK
|
|||
RETURN_IF_BAD_RC(rc);
|
||||
leaf_nodes_.InsertAfter(node, new_leaf);
|
||||
*split_node = new_leaf;
|
||||
if (slot == node->slotuse_ && traits::kAppendMode) {
|
||||
// Split high. Good for bulk load and keys are in asending order on insert
|
||||
*split_key = key;
|
||||
// Just insert the new key to the new leaf. No further need to move the keys
|
||||
// from one leaf to the other.
|
||||
rc = new_leaf->InsertIntoSlot(nullptr, 0, key, std::move(value));
|
||||
// 50/50 split
|
||||
rc = node->Split(new_leaf);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
*split_key = new_leaf->keys_[0];
|
||||
if (LessThan(key, *split_key)) {
|
||||
rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value));
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
} else {
|
||||
// 50/50 split
|
||||
rc = node->Split(new_leaf);
|
||||
slot -= node->slotuse_;
|
||||
rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value));
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
*split_key = new_leaf->keys_[0];
|
||||
if (LessThan(key, *split_key)) {
|
||||
rc = node->InsertIntoSlot(nullptr, slot, key, std::move(value));
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
} else {
|
||||
slot -= node->slotuse_;
|
||||
rc = new_leaf->InsertIntoSlot(nullptr, slot, key, std::move(value));
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
|
@ -309,25 +300,18 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InnerInsert
|
|||
rc = AllocateInner(&new_inner);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
*split_node = new_inner;
|
||||
if (slot == node->slotuse_ && traits::kAppendMode) {
|
||||
*split_key = key;
|
||||
new_inner->data_[0] = node->data_[node->slotuse_];
|
||||
rc = new_inner->InsertIntoSlot(0, key, ptr);
|
||||
rc = node->Split(new_inner, split_key);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
if (LessThan(key, *split_key)) {
|
||||
// Need to readjust the slot position since the split key is no longer in the two children.
|
||||
slot = FindSlot(node, key);
|
||||
rc = node->InsertIntoSlot(slot, key, ptr);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
} else {
|
||||
rc = node->Split(new_inner, split_key);
|
||||
// Same reasoning as above
|
||||
slot = FindSlot(new_inner, key);
|
||||
rc = new_inner->InsertIntoSlot(slot, key, ptr);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
if (LessThan(key, *split_key)) {
|
||||
// Need to readjust the slot position since the split key is no longer in the two children.
|
||||
slot = FindSlot(node, key);
|
||||
rc = node->InsertIntoSlot(slot, key, ptr);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
} else {
|
||||
// Same reasoning as above
|
||||
slot = FindSlot(new_inner, key);
|
||||
rc = new_inner->InsertIntoSlot(slot, key, ptr);
|
||||
RETURN_IF_BAD_RC(rc);
|
||||
}
|
||||
}
|
||||
}
|
||||
return rc;
|
||||
|
@ -377,8 +361,7 @@ typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::InsertKeyVa
|
|||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock,
|
||||
bool forUpdate,
|
||||
typename BPlusTree<K, V, A, C, T>::IndexRc BPlusTree<K, V, A, C, T>::Locate(RWLock *parent_lock, bool forUpdate,
|
||||
BPlusTree<K, V, A, C, T>::BaseNode *top,
|
||||
const key_type &key,
|
||||
BPlusTree<K, V, A, C, T>::LeafNode **ln,
|
||||
|
@ -481,9 +464,6 @@ Status BPlusTree<K, V, A, C, T>::DoInsert(const key_type &key, std::unique_ptr<v
|
|||
do {
|
||||
// Track all the paths to the target and lock each internal node in S.
|
||||
LockPathCB InsCB(this, retry);
|
||||
// Mark the numKeysArray invalid. We may latch the tree in S and multiple guys are doing insert.
|
||||
// But it is okay as we all set the same value.
|
||||
stats_.num_keys_array_valid_ = false;
|
||||
// Initially we lock path in S unless we need to do node split.
|
||||
retry = false;
|
||||
BaseNode *new_child = nullptr;
|
||||
|
@ -552,70 +532,6 @@ std::unique_ptr<V> BPlusTree<K, V, A, C, T>::DoUpdate(const key_type &key, std::
|
|||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
void BPlusTree<K, V, A, C, T>::PopulateNumKeys() {
|
||||
// Start from the root and we calculate how many leaf nodes as pointed to by each inner node.
|
||||
// The results are stored in the numKeys array in each inner node.
|
||||
(void)PopulateNumKeys(root_);
|
||||
// Indicate the result is accurate since we have the tree locked exclusive.
|
||||
stats_.num_keys_array_valid_ = true;
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
uint64_t BPlusTree<K, V, A, C, T>::PopulateNumKeys(BPlusTree<K, V, A, C, T>::BaseNode *n) {
|
||||
if (n->is_leafnode()) {
|
||||
auto *leaf = static_cast<LeafNode *>(n);
|
||||
return leaf->slotuse_;
|
||||
} else {
|
||||
auto *inner = static_cast<InnerNode *>(n);
|
||||
uint64_t num_keys = 0;
|
||||
for (auto i = 0; i < inner->slotuse_ + 1; i++) {
|
||||
inner->num_keys_[i] = PopulateNumKeys(inner->data_[i]);
|
||||
num_keys += inner->num_keys_[i];
|
||||
}
|
||||
return num_keys;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
typename BPlusTree<K, V, A, C, T>::key_type BPlusTree<K, V, A, C, T>::KeyAtPos(uint64_t inx) {
|
||||
if (stats_.num_keys_array_valid_ == false) {
|
||||
// We need exclusive access to the tree. If concurrent insert is going on, it is hard to get accurate numbers
|
||||
UniqueLock lck(&rw_lock_);
|
||||
// Check again.
|
||||
if (stats_.num_keys_array_valid_ == false) {
|
||||
PopulateNumKeys();
|
||||
}
|
||||
}
|
||||
// Now we know how many keys each inner branch contains, we can now traverse the correct node in log n time.
|
||||
return KeyAtPos(root_, inx);
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
typename BPlusTree<K, V, A, C, T>::key_type BPlusTree<K, V, A, C, T>::KeyAtPos(BPlusTree<K, V, A, C, T>::BaseNode *n,
|
||||
uint64_t inx) {
|
||||
if (n->is_leafnode()) {
|
||||
auto *leaf = static_cast<LeafNode *>(n);
|
||||
return leaf->keys_[leaf->slot_dir_[inx]];
|
||||
} else {
|
||||
auto *inner = static_cast<InnerNode *>(n);
|
||||
if ((inx + 1) > inner->num_keys_[0]) {
|
||||
inx -= inner->num_keys_[0];
|
||||
} else {
|
||||
return KeyAtPos(inner->data_[0], inx);
|
||||
}
|
||||
for (auto i = 0; i < inner->slotuse_; i++) {
|
||||
if ((inx + 1) > inner->num_keys_[inner->slot_dir_[i] + 1]) {
|
||||
inx -= inner->num_keys_[inner->slot_dir_[i] + 1];
|
||||
} else {
|
||||
return KeyAtPos(inner->data_[inner->slot_dir_[i] + 1], inx);
|
||||
}
|
||||
}
|
||||
}
|
||||
// If we get here, inx is way too big. Instead of throwing exception, we will just return the default value
|
||||
// of key_type whatever it is.
|
||||
return key_type();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
|
|
|
@ -286,7 +286,8 @@ typename BPlusTree<K, V, A, C, T>::ConstIterator &BPlusTree<K, V, A, C, T>::Cons
|
|||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
typename BPlusTree<K, V, A, C, T>::ConstIterator BPlusTree<K, V, A, C, T>::Search(const key_type &key) const {
|
||||
std::pair<typename BPlusTree<K, V, A, C, T>::ConstIterator, bool> BPlusTree<K, V, A, C, T>::Search(
|
||||
const key_type &key) const {
|
||||
if (root_ != nullptr) {
|
||||
LeafNode *leaf = nullptr;
|
||||
slot_type slot;
|
||||
|
@ -294,21 +295,15 @@ typename BPlusTree<K, V, A, C, T>::ConstIterator BPlusTree<K, V, A, C, T>::Searc
|
|||
// Lock the tree in S, pass the lock to Locate which will unlock it for us underneath.
|
||||
myLock->LockShared();
|
||||
IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot);
|
||||
if (rc == IndexRc::kOk) {
|
||||
// All locks from the tree to the parent of leaf are all gone. We still have a S lock
|
||||
// on the leaf. The unlock will be handled by the iterator when it goes out of scope.
|
||||
return ConstIterator(leaf, slot, true);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << ".";
|
||||
return cend();
|
||||
}
|
||||
bool find = (rc == IndexRc::kOk);
|
||||
return std::make_pair(ConstIterator(leaf, slot, find), find);
|
||||
} else {
|
||||
return cend();
|
||||
return std::make_pair(cend(), false);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
typename BPlusTree<K, V, A, C, T>::Iterator BPlusTree<K, V, A, C, T>::Search(const key_type &key) {
|
||||
std::pair<typename BPlusTree<K, V, A, C, T>::Iterator, bool> BPlusTree<K, V, A, C, T>::Search(const key_type &key) {
|
||||
if (root_ != nullptr) {
|
||||
LeafNode *leaf = nullptr;
|
||||
slot_type slot;
|
||||
|
@ -316,23 +311,17 @@ typename BPlusTree<K, V, A, C, T>::Iterator BPlusTree<K, V, A, C, T>::Search(con
|
|||
// Lock the tree in S, pass the lock to Locate which will unlock it for us underneath.
|
||||
myLock->LockShared();
|
||||
IndexRc rc = Locate(myLock, false, root_, key, &leaf, &slot);
|
||||
if (rc == IndexRc::kOk) {
|
||||
// All locks from the tree to the parent of leaf are all gone. We still have a S lock
|
||||
// on the leaf. The unlock will be handled by the iterator when it goes out of scope.
|
||||
return Iterator(leaf, slot, true);
|
||||
} else {
|
||||
MS_LOG(DEBUG) << "Key not found. rc = " << static_cast<int>(rc) << ".";
|
||||
return end();
|
||||
}
|
||||
bool find = (rc == IndexRc::kOk);
|
||||
return std::make_pair(Iterator(leaf, slot, find), find);
|
||||
} else {
|
||||
return end();
|
||||
return std::make_pair(end(), false);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
typename BPlusTree<K, V, A, C, T>::value_type BPlusTree<K, V, A, C, T>::operator[](key_type key) {
|
||||
Iterator it = Search(key);
|
||||
return it.value();
|
||||
auto r = Search(key);
|
||||
return r.first.value();
|
||||
}
|
||||
|
||||
template <typename K, typename V, typename A, typename C, typename T>
|
||||
|
|
|
@ -32,13 +32,8 @@ using mindspore::LogStream;
|
|||
// For testing purposes, we will make the branching factor very low.
|
||||
struct mytraits {
|
||||
using slot_type = uint16_t;
|
||||
|
||||
static const slot_type kLeafSlots = 6;
|
||||
|
||||
static const slot_type kInnerSlots = 3;
|
||||
|
||||
static const bool kAppendMode = false;
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
@ -95,13 +90,14 @@ TEST_F(MindDataTestBPlusTree, Test1) {
|
|||
// Test search
|
||||
{
|
||||
MS_LOG(INFO) << "Locate key " << 100 << " Expect found.";
|
||||
auto it = btree.Search(100);
|
||||
EXPECT_FALSE(it == btree.end());
|
||||
auto r = btree.Search(100);
|
||||
auto &it = r.first;
|
||||
EXPECT_TRUE(r.second);
|
||||
EXPECT_EQ(it.key(), 100);
|
||||
EXPECT_EQ(it.value(), "Hello World. I am 100");
|
||||
MS_LOG(INFO) << "Locate key " << 300 << " Expect not found.";
|
||||
it = btree.Search(300);
|
||||
EXPECT_TRUE(it == btree.end());
|
||||
auto q = btree.Search(300);
|
||||
EXPECT_FALSE(q.second);
|
||||
}
|
||||
|
||||
// Test duplicate key
|
||||
|
@ -169,26 +165,18 @@ TEST_F(MindDataTestBPlusTree, Test2) {
|
|||
{
|
||||
MS_LOG(INFO) << "Locating key from 0 to 9999. Expect found.";
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
auto it = btree.Search(i);
|
||||
bool eoS = (it == btree.end());
|
||||
EXPECT_FALSE(eoS);
|
||||
if (!eoS) {
|
||||
auto r = btree.Search(i);
|
||||
EXPECT_TRUE(r.second);
|
||||
if (r.second) {
|
||||
auto &it = r.first;
|
||||
EXPECT_EQ(it.key(), i);
|
||||
std::string val = "Hello World. I am " + std::to_string(i);
|
||||
EXPECT_EQ(it.value(), val);
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "Locate key " << 10000 << ". Expect not found";
|
||||
auto it = btree.Search(10000);
|
||||
EXPECT_TRUE(it == btree.end());
|
||||
}
|
||||
|
||||
// Test to retrieve key at certain position.
|
||||
{
|
||||
for (int i = 0; i < 10000; i++) {
|
||||
int k = btree.KeyAtPos(i);
|
||||
EXPECT_EQ(k, i);
|
||||
}
|
||||
auto q = btree.Search(10000);
|
||||
EXPECT_FALSE(q.second);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,7 +192,8 @@ TEST_F(MindDataTestBPlusTree, Test3) {
|
|||
uint64_t max = ai.max_key();
|
||||
EXPECT_EQ(min, 1);
|
||||
EXPECT_EQ(max, 4);
|
||||
auto it = ai.Search(3);
|
||||
auto r = ai.Search(3);
|
||||
auto &it = r.first;
|
||||
EXPECT_EQ(it.value(), "b");
|
||||
MS_LOG(INFO) << "Dump all the values using [] operator.";
|
||||
for (uint64_t i = min; i <= max; i++) {
|
||||
|
|
Loading…
Reference in New Issue