#1532 Search with ivf_flat failed with open-dataset: sift-256-hamming

Signed-off-by: groot <yihua.mo@zilliz.com>
This commit is contained in:
groot 2020-03-07 19:50:57 +08:00
parent a08b51c2b6
commit c2b5b00592
7 changed files with 16 additions and 9 deletions

View File

@ -40,6 +40,7 @@ Please mark all change in change log and use the issue from GitHub
- \#1521 Make cache_insert_data take effect in-service - \#1521 Make cache_insert_data take effect in-service
- \#1525 Add setter API for config preload_table - \#1525 Add setter API for config preload_table
- \#1530 Set table file with correct engine type in meta - \#1530 Set table file with correct engine type in meta
- \#1532 Search with ivf_flat failed with open-dataset: sift-256-hamming
- \#1535 Degradation searching performance with metric_type: binary_idmap - \#1535 Degradation searching performance with metric_type: binary_idmap
## Feature ## Feature

View File

@ -812,7 +812,7 @@ DBImpl::CompactFile(const std::string& table_id, const meta::TableFileSchema& fi
// Update table files state // Update table files state
// if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size // if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size
// else set file type to RAW, no need to build index // else set file type to RAW, no need to build index
if (compacted_file.engine_type_ != (int)EngineType::FAISS_IDMAP) { if (!utils::IsRawIndexType(compacted_file.engine_type_)) {
compacted_file.file_type_ = (segment_writer_ptr->Size() >= compacted_file.index_file_size_) compacted_file.file_type_ = (segment_writer_ptr->Size() >= compacted_file.index_file_size_)
? meta::TableFileSchema::TO_INDEX ? meta::TableFileSchema::TO_INDEX
: meta::TableFileSchema::RAW; : meta::TableFileSchema::RAW;
@ -1468,7 +1468,7 @@ DBImpl::MergeFiles(const std::string& table_id, const meta::TableFilesSchema& fi
// step 4: update table files state // step 4: update table files state
// if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size // if index type isn't IDMAP, set file type to TO_INDEX if file size exceed index_file_size
// else set file type to RAW, no need to build index // else set file type to RAW, no need to build index
if (table_file.engine_type_ != (int)EngineType::FAISS_IDMAP) { if (!utils::IsRawIndexType(table_file.engine_type_)) {
table_file.file_type_ = (segment_writer_ptr->Size() >= table_file.index_file_size_) table_file.file_type_ = (segment_writer_ptr->Size() >= table_file.index_file_size_)
? meta::TableFileSchema::TO_INDEX ? meta::TableFileSchema::TO_INDEX
: meta::TableFileSchema::RAW; : meta::TableFileSchema::RAW;
@ -1770,7 +1770,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
// for IDMAP type, only wait all NEW file converted to RAW file // for IDMAP type, only wait all NEW file converted to RAW file
// for other type, wait NEW/RAW/NEW_MERGE/NEW_INDEX/TO_INDEX files converted to INDEX files // for other type, wait NEW/RAW/NEW_MERGE/NEW_INDEX/TO_INDEX files converted to INDEX files
std::vector<int> file_types; std::vector<int> file_types;
if (index.engine_type_ == static_cast<int32_t>(EngineType::FAISS_IDMAP)) { if (utils::IsRawIndexType(index.engine_type_)) {
file_types = { file_types = {
static_cast<int32_t>(meta::TableFileSchema::NEW), static_cast<int32_t>(meta::TableFileSchema::NEW),
static_cast<int32_t>(meta::TableFileSchema::NEW_MERGE), static_cast<int32_t>(meta::TableFileSchema::NEW_MERGE),
@ -1792,7 +1792,7 @@ DBImpl::BuildTableIndexRecursively(const std::string& table_id, const TableIndex
while (!table_files.empty()) { while (!table_files.empty()) {
ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times; ENGINE_LOG_DEBUG << "Non index files detected! Will build index " << times;
if (index.engine_type_ != (int)EngineType::FAISS_IDMAP) { if (!utils::IsRawIndexType(index.engine_type_)) {
status = meta_ptr_->UpdateTableFilesToIndex(table_id); status = meta_ptr_->UpdateTableFilesToIndex(table_id);
} }

View File

@ -215,6 +215,11 @@ IsSameIndex(const TableIndex& index1, const TableIndex& index2) {
index1.metric_type_ == index2.metric_type_; index1.metric_type_ == index2.metric_type_;
} }
bool
IsRawIndexType(int32_t type) {
return (type == (int32_t)EngineType::FAISS_IDMAP) || (type == (int32_t)EngineType::FAISS_BIN_IDMAP);
}
meta::DateT meta::DateT
GetDate(const std::time_t& t, int day_delta) { GetDate(const std::time_t& t, int day_delta) {
struct tm ltm; struct tm ltm;

View File

@ -45,6 +45,9 @@ GetParentPath(const std::string& path, std::string& parent_path);
bool bool
IsSameIndex(const TableIndex& index1, const TableIndex& index2); IsSameIndex(const TableIndex& index1, const TableIndex& index2);
bool
IsRawIndexType(int32_t type);
meta::DateT meta::DateT
GetDate(const std::time_t& t, int day_delta = 0); GetDate(const std::time_t& t, int day_delta = 0);
meta::DateT meta::DateT

View File

@ -406,7 +406,7 @@ ExecutionEngineImpl::Load(bool to_cache) {
utils::GetParentPath(location_, segment_dir); utils::GetParentPath(location_, segment_dir);
auto segment_reader_ptr = std::make_shared<segment::SegmentReader>(segment_dir); auto segment_reader_ptr = std::make_shared<segment::SegmentReader>(segment_dir);
if (index_type_ == EngineType::FAISS_IDMAP || index_type_ == EngineType::FAISS_BIN_IDMAP) { if (utils::IsRawIndexType((int32_t)index_type_)) {
index_ = index_type_ == EngineType::FAISS_IDMAP ? GetVecIndexFactory(IndexType::FAISS_IDMAP) index_ = index_type_ == EngineType::FAISS_IDMAP ? GetVecIndexFactory(IndexType::FAISS_IDMAP)
: GetVecIndexFactory(IndexType::FAISS_BIN_IDMAP); : GetVecIndexFactory(IndexType::FAISS_BIN_IDMAP);
milvus::json conf{{knowhere::meta::DEVICEID, gpu_num_}, {knowhere::meta::DIM, dim_}}; milvus::json conf{{knowhere::meta::DEVICEID, gpu_num_}, {knowhere::meta::DIM, dim_}};

View File

@ -2086,8 +2086,7 @@ MySQLMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/)
// If we are deleting a raw table file, it means it's okay to delete the entire segment directory. // If we are deleting a raw table file, it means it's okay to delete the entire segment directory.
// Else, we can only delete the single file // Else, we can only delete the single file
// TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky // TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky
if (table_file.engine_type_ == (int32_t)EngineType::FAISS_IDMAP || if (utils::IsRawIndexType(table_file.engine_type_)) {
table_file.engine_type_ == (int32_t)EngineType::FAISS_BIN_IDMAP) {
utils::DeleteSegment(options_, table_file); utils::DeleteSegment(options_, table_file);
std::string segment_dir; std::string segment_dir;
utils::GetParentPath(table_file.location_, segment_dir); utils::GetParentPath(table_file.location_, segment_dir);

View File

@ -1425,8 +1425,7 @@ SqliteMetaImpl::CleanUpFilesWithTTL(uint64_t seconds /*, CleanUpFilter* filter*/
// If we are deleting a raw table file, it means it's okay to delete the entire segment directory. // If we are deleting a raw table file, it means it's okay to delete the entire segment directory.
// Else, we can only delete the single file // Else, we can only delete the single file
// TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky // TODO(zhiru): We determine whether a table file is raw by its engine type. This is a bit hacky
if (table_file.engine_type_ == (int32_t)EngineType::FAISS_IDMAP || if (utils::IsRawIndexType(table_file.engine_type_)) {
table_file.engine_type_ == (int32_t)EngineType::FAISS_BIN_IDMAP) {
utils::DeleteSegment(options_, table_file); utils::DeleteSegment(options_, table_file);
std::string segment_dir; std::string segment_dir;
utils::GetParentPath(table_file.location_, segment_dir); utils::GetParentPath(table_file.location_, segment_dir);