optimize merge process (#2419)

* optimize merge process

Signed-off-by: groot <yihua.mo@zilliz.com>

* typo

Signed-off-by: groot <yihua.mo@zilliz.com>

* refine code

Signed-off-by: yhmo <yihua.mo@zilliz.com>

* drop collecion issue

Signed-off-by: yhmo <yihua.mo@zilliz.com>
This commit is contained in:
groot 2020-05-25 01:19:25 -05:00 committed by GitHub
parent 2982004e2b
commit 2d9b358e16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 37 additions and 12 deletions

View File

@ -17,14 +17,6 @@
namespace milvus {
namespace engine {
namespace {
struct {
bool
operator()(meta::SegmentSchema& left, meta::SegmentSchema& right) const {
return left.file_size_ > right.file_size_;
}
} CompareSegment;
} // namespace
Status
MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGroups& files_groups) {
@ -54,7 +46,10 @@ MergeAdaptiveStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesG
}
// arrange files by file size in descending order
std::sort(sort_files.begin(), sort_files.end(), CompareSegment);
std::sort(sort_files.begin(), sort_files.end(),
[](const meta::SegmentSchema& left, const meta::SegmentSchema& right) {
return left.file_size_ > right.file_size_;
});
// pick files to merge
int64_t index_file_size = sort_files[0].index_file_size_;

View File

@ -14,6 +14,7 @@
#include "db/meta/MetaConsts.h"
#include "utils/Log.h"
#include <algorithm>
#include <map>
#include <vector>
@ -34,13 +35,39 @@ MergeLayeredStrategy::RegroupFiles(meta::FilesHolder& files_holder, MergeFilesGr
{1UL << 30, meta::SegmentsSchema()}, // 1GB
};
meta::SegmentsSchema& files = files_holder.HoldFiles();
meta::SegmentsSchema sort_files = files_holder.HoldFiles();
// no need to merge single file
if (sort_files.size() < 2) {
return Status::OK();
}
// arrange files by file size in descending order
std::sort(sort_files.begin(), sort_files.end(),
[](const meta::SegmentSchema& left, const meta::SegmentSchema& right) {
return left.file_size_ > right.file_size_;
});
// priority pick files that merge size greater than index_file_size
// to avoid big files such as index_file_size = 1024, merged file size = 1280
int64_t index_file_size = sort_files[0].index_file_size_;
size_t biggest_size = sort_files[0].file_size_;
for (auto iter = sort_files.end() - 1; iter != sort_files.begin() + 1; --iter) {
if ((*iter).file_size_ + biggest_size > index_file_size) {
meta::SegmentsSchema temp_group = {*sort_files.begin(), *iter};
files_groups.emplace_back(temp_group);
sort_files.erase(iter);
sort_files.erase(sort_files.begin());
break;
}
}
meta::SegmentsSchema huge_files;
// iterater from end, because typically the files_holder get files in order from largest to smallest
for (meta::SegmentsSchema::reverse_iterator iter = files.rbegin(); iter != files.rend(); ++iter) {
// put files to layers
for (meta::SegmentsSchema::reverse_iterator iter = sort_files.rbegin(); iter != sort_files.rend(); ++iter) {
meta::SegmentSchema& file = *iter;
if (file.index_file_size_ > 0 && file.file_size_ > (size_t)(file.index_file_size_)) {
// file that no need to merge
files_holder.UnmarkFile(file);
continue;
}

View File

@ -76,6 +76,9 @@ DropCollectionRequest::OnExecute() {
return status;
}
// step 4: flush to trigger CleanUpFilesWithTTL
status = DBWrapper::DB()->Flush();
rc.ElapseFromBegin("total cost");
} catch (std::exception& ex) {
return Status(SERVER_UNEXPECTED_ERROR, ex.what());