mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 02:48:45 +08:00
replace loaded binlog with binlog index for search performance (#27673)
Signed-off-by: cqy123456 <qianya.cheng@zilliz.com>
This commit is contained in:
parent
e88212ba4b
commit
4fbe3c9142
@ -265,10 +265,10 @@ queryNode:
|
||||
# This parameter is only useful when enable-disk = true.
|
||||
# And this value should be a number greater than 1 and less than 32.
|
||||
chunkRows: 1024 # The number of vectors in a chunk.
|
||||
growing: # growing a vector index for growing segment to accelerate search
|
||||
interimIndex: # build a vector temperate index for growing segment or binlog to accelerate search
|
||||
enableIndex: true
|
||||
nlist: 128 # growing segment index nlist
|
||||
nprobe: 16 # nprobe to search growing segment, based on your accuracy requirement, must smaller than nlist
|
||||
nlist: 128 # segment index nlist
|
||||
nprobe: 16 # nprobe to search segment, based on your accuracy requirement, must smaller than nlist
|
||||
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
|
||||
enableDisk: false # enable querynode load disk index, and search on disk index
|
||||
maxDiskUsagePercentage: 95
|
||||
|
@ -31,8 +31,10 @@ VectorFieldIndexing::VectorFieldIndexing(const FieldMeta& field_meta,
|
||||
: FieldIndexing(field_meta, segcore_config),
|
||||
build(false),
|
||||
sync_with_index(false),
|
||||
config_(std::make_unique<VecIndexConfig>(
|
||||
segment_max_row_count, field_index_meta, segcore_config)) {
|
||||
config_(std::make_unique<VecIndexConfig>(segment_max_row_count,
|
||||
field_index_meta,
|
||||
segcore_config,
|
||||
SegmentType::Growing)) {
|
||||
index_ = std::make_unique<index::VectorMemIndex>(
|
||||
config_->GetIndexType(),
|
||||
config_->GetMetricType(),
|
||||
|
@ -243,7 +243,7 @@ class IndexingRecord {
|
||||
for (auto& [field_id, field_meta] : schema_.get_fields()) {
|
||||
++offset_id;
|
||||
if (field_meta.is_vector() &&
|
||||
segcore_config_.get_enable_growing_segment_index()) {
|
||||
segcore_config_.get_enable_interim_segment_index()) {
|
||||
// TODO: skip binary small index now, reenable after config.yaml is ready
|
||||
if (field_meta.get_data_type() == DataType::VECTOR_BINARY) {
|
||||
continue;
|
||||
|
@ -13,15 +13,15 @@
|
||||
#include "log/Log.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
VecIndexConfig::VecIndexConfig(const int64_t max_index_row_cout,
|
||||
const FieldIndexMeta& index_meta_,
|
||||
const SegcoreConfig& config)
|
||||
const SegcoreConfig& config,
|
||||
const SegmentType& segment_type)
|
||||
: max_index_row_count_(max_index_row_cout), config_(config) {
|
||||
origin_index_type_ = index_meta_.GetIndexType();
|
||||
metric_type_ = index_meta_.GeMetricType();
|
||||
|
||||
index_type_ = support_index_types[0];
|
||||
index_type_ = support_index_types.at(segment_type);
|
||||
build_params_[knowhere::meta::METRIC_TYPE] = metric_type_;
|
||||
build_params_[knowhere::indexparam::NLIST] =
|
||||
std::to_string(config_.get_nlist());
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "knowhere/config.h"
|
||||
#include "SegcoreConfig.h"
|
||||
#include "common/QueryInfo.h"
|
||||
#include "common/type_c.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -27,8 +28,9 @@ enum class IndexConfigLevel {
|
||||
};
|
||||
|
||||
class VecIndexConfig {
|
||||
inline static const std::vector<std::string> support_index_types = {
|
||||
knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC};
|
||||
inline static const std::map<SegmentType, std::string> support_index_types =
|
||||
{{SegmentType::Growing, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC},
|
||||
{SegmentType::Sealed, knowhere::IndexEnum::INDEX_FAISS_IVFFLAT}};
|
||||
|
||||
inline static const std::map<std::string, double> index_build_ratio = {
|
||||
{knowhere::IndexEnum::INDEX_FAISS_IVFFLAT_CC, 0.1}};
|
||||
@ -39,7 +41,8 @@ class VecIndexConfig {
|
||||
public:
|
||||
VecIndexConfig(const int64_t max_index_row_count,
|
||||
const FieldIndexMeta& index_meta_,
|
||||
const SegcoreConfig& config);
|
||||
const SegcoreConfig& config,
|
||||
const SegmentType& segment_type);
|
||||
|
||||
int64_t
|
||||
GetBuildThreshold() const noexcept;
|
||||
|
@ -64,20 +64,20 @@ class SegcoreConfig {
|
||||
}
|
||||
|
||||
void
|
||||
set_enable_growing_segment_index(bool enable_growing_segment_index) {
|
||||
enable_growing_segment_index_ = enable_growing_segment_index;
|
||||
set_enable_interim_segment_index(bool enable_interim_segment_index) {
|
||||
this->enable_interim_segment_index_ = enable_interim_segment_index;
|
||||
}
|
||||
|
||||
bool
|
||||
get_enable_growing_segment_index() const {
|
||||
return enable_growing_segment_index_;
|
||||
get_enable_interim_segment_index() const {
|
||||
return enable_interim_segment_index_;
|
||||
}
|
||||
|
||||
private:
|
||||
bool enable_growing_segment_index_ = false;
|
||||
int64_t chunk_rows_ = 32 * 1024;
|
||||
int64_t nlist_ = 100;
|
||||
int64_t nprobe_ = 4;
|
||||
inline static bool enable_interim_segment_index_ = false;
|
||||
inline static int64_t chunk_rows_ = 32 * 1024;
|
||||
inline static int64_t nlist_ = 100;
|
||||
inline static int64_t nprobe_ = 4;
|
||||
};
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
@ -112,7 +112,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_offset,
|
||||
field_meta);
|
||||
}
|
||||
//insert vector data into index
|
||||
if (segcore_config_.get_enable_growing_segment_index()) {
|
||||
if (segcore_config_.get_enable_interim_segment_index()) {
|
||||
indexing_record_.AppendingIndex(
|
||||
reserved_offset,
|
||||
num_rows,
|
||||
@ -195,7 +195,7 @@ SegmentGrowingImpl::LoadFieldData(const LoadFieldDataInfo& infos) {
|
||||
insert_record_.get_field_data_base(field_id)->set_data_raw(
|
||||
reserved_offset, field_data);
|
||||
}
|
||||
if (segcore_config_.get_enable_growing_segment_index()) {
|
||||
if (segcore_config_.get_enable_interim_segment_index()) {
|
||||
auto offset = reserved_offset;
|
||||
for (auto& data : field_data) {
|
||||
auto row_count = data->get_num_rows();
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include "storage/ChunkCacheSingleton.h"
|
||||
#include "common/File.h"
|
||||
#include "common/Tracer.h"
|
||||
#include "index/VectorMemIndex.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
@ -98,16 +99,19 @@ SegmentSealedImpl::LoadVecIndex(const LoadIndexInfo& info) {
|
||||
std::to_string(num_rows_.value()) + ")");
|
||||
}
|
||||
AssertInfo(!vector_indexings_.is_ready(field_id), "vec index is not ready");
|
||||
if (get_bit(field_data_ready_bitset_, field_id)) {
|
||||
fields_.erase(field_id);
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
} else if (get_bit(binlog_index_bitset_, field_id)) {
|
||||
set_bit(binlog_index_bitset_, field_id, false);
|
||||
vector_indexings_.drop_field_indexing(field_id);
|
||||
}
|
||||
update_row_count(row_count);
|
||||
vector_indexings_.append_field_indexing(
|
||||
field_id,
|
||||
metric_type,
|
||||
std::move(const_cast<LoadIndexInfo&>(info).index));
|
||||
|
||||
set_bit(index_ready_bitset_, field_id, true);
|
||||
update_row_count(row_count);
|
||||
// release field column
|
||||
fields_.erase(field_id);
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
}
|
||||
|
||||
void
|
||||
@ -366,11 +370,29 @@ SegmentSealedImpl::LoadFieldData(FieldId field_id, FieldDataInfo& data) {
|
||||
insert_record_.seal_pks();
|
||||
}
|
||||
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
bool use_temp_index = false;
|
||||
{
|
||||
// update num_rows to build temperate binlog index
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(num_rows);
|
||||
}
|
||||
|
||||
if (generate_binlog_index(field_id)) {
|
||||
std::unique_lock lck(mutex_);
|
||||
fields_.erase(field_id);
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
use_temp_index = true;
|
||||
}
|
||||
|
||||
if (!use_temp_index) {
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_id, true);
|
||||
}
|
||||
}
|
||||
{
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(num_rows);
|
||||
}
|
||||
std::unique_lock lck(mutex_);
|
||||
update_row_count(num_rows);
|
||||
}
|
||||
|
||||
void
|
||||
@ -609,7 +631,26 @@ SegmentSealedImpl::vector_search(SearchInfo& search_info,
|
||||
|
||||
AssertInfo(field_meta.is_vector(),
|
||||
"The meta type of vector field is not vector type");
|
||||
if (get_bit(index_ready_bitset_, field_id)) {
|
||||
if (get_bit(binlog_index_bitset_, field_id)) {
|
||||
AssertInfo(
|
||||
vec_binlog_config_.find(field_id) != vec_binlog_config_.end(),
|
||||
"The binlog params is not generate.");
|
||||
auto binlog_search_info =
|
||||
vec_binlog_config_.at(field_id)->GetSearchConf(search_info);
|
||||
|
||||
AssertInfo(vector_indexings_.is_ready(field_id),
|
||||
"vector indexes isn't ready for field " +
|
||||
std::to_string(field_id.get()));
|
||||
query::SearchOnSealedIndex(*schema_,
|
||||
vector_indexings_,
|
||||
binlog_search_info,
|
||||
query_data,
|
||||
query_count,
|
||||
bitset,
|
||||
output);
|
||||
milvus::tracer::AddEvent(
|
||||
"finish_searching_vector_temperate_binlog_index");
|
||||
} else if (get_bit(index_ready_bitset_, field_id)) {
|
||||
AssertInfo(vector_indexings_.is_ready(field_id),
|
||||
"vector indexes isn't ready for field " +
|
||||
std::to_string(field_id.get()));
|
||||
@ -676,7 +717,8 @@ SegmentSealedImpl::get_vector(FieldId field_id,
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
AssertInfo(field_meta.is_vector(), "vector field is not vector type");
|
||||
|
||||
if (!get_bit(index_ready_bitset_, field_id)) {
|
||||
if (!get_bit(index_ready_bitset_, field_id) &&
|
||||
!get_bit(binlog_index_bitset_, field_id)) {
|
||||
return fill_with_empty(field_id, count);
|
||||
}
|
||||
|
||||
@ -771,8 +813,14 @@ SegmentSealedImpl::DropFieldData(const FieldId field_id) {
|
||||
} else {
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
std::unique_lock lck(mutex_);
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
insert_record_.drop_field_data(field_id);
|
||||
if (get_bit(field_data_ready_bitset_, field_id)) {
|
||||
set_bit(field_data_ready_bitset_, field_id, false);
|
||||
insert_record_.drop_field_data(field_id);
|
||||
}
|
||||
if (get_bit(binlog_index_bitset_, field_id)) {
|
||||
set_bit(binlog_index_bitset_, field_id, false);
|
||||
vector_indexings_.drop_field_indexing(field_id);
|
||||
}
|
||||
lck.unlock();
|
||||
}
|
||||
}
|
||||
@ -807,7 +855,8 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const {
|
||||
}
|
||||
|
||||
auto& request_fields = plan->extra_info_opt_.value().involved_fields_;
|
||||
auto field_ready_bitset = field_data_ready_bitset_ | index_ready_bitset_;
|
||||
auto field_ready_bitset =
|
||||
field_data_ready_bitset_ | index_ready_bitset_ | binlog_index_bitset_;
|
||||
AssertInfo(request_fields.size() == field_ready_bitset.size(),
|
||||
"Request fields size not equal to field ready bitset size when "
|
||||
"check search");
|
||||
@ -823,13 +872,19 @@ SegmentSealedImpl::check_search(const query::Plan* plan) const {
|
||||
}
|
||||
}
|
||||
|
||||
SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema, int64_t segment_id)
|
||||
: field_data_ready_bitset_(schema->size()),
|
||||
SegmentSealedImpl::SegmentSealedImpl(SchemaPtr schema,
|
||||
IndexMetaPtr index_meta,
|
||||
const SegcoreConfig& segcore_config,
|
||||
int64_t segment_id)
|
||||
: segcore_config_(segcore_config),
|
||||
field_data_ready_bitset_(schema->size()),
|
||||
index_ready_bitset_(schema->size()),
|
||||
binlog_index_bitset_(schema->size()),
|
||||
scalar_indexings_(schema->size()),
|
||||
insert_record_(*schema, MAX_ROW_COUNT),
|
||||
schema_(schema),
|
||||
id_(segment_id) {
|
||||
id_(segment_id),
|
||||
col_index_meta_(index_meta) {
|
||||
}
|
||||
|
||||
SegmentSealedImpl::~SegmentSealedImpl() {
|
||||
@ -1081,7 +1136,8 @@ SegmentSealedImpl::bulk_subscript(FieldId field_id,
|
||||
bool
|
||||
SegmentSealedImpl::HasIndex(FieldId field_id) const {
|
||||
std::shared_lock lck(mutex_);
|
||||
return get_bit(index_ready_bitset_, field_id);
|
||||
return get_bit(index_ready_bitset_, field_id) |
|
||||
get_bit(binlog_index_bitset_, field_id);
|
||||
}
|
||||
|
||||
bool
|
||||
@ -1100,7 +1156,8 @@ SegmentSealedImpl::HasRawData(int64_t field_id) const {
|
||||
auto fieldID = FieldId(field_id);
|
||||
const auto& field_meta = schema_->operator[](fieldID);
|
||||
if (datatype_is_vector(field_meta.get_data_type())) {
|
||||
if (get_bit(index_ready_bitset_, fieldID)) {
|
||||
if (get_bit(index_ready_bitset_, fieldID) |
|
||||
get_bit(binlog_index_bitset_, fieldID)) {
|
||||
AssertInfo(vector_indexings_.is_ready(fieldID),
|
||||
"vector index is not ready");
|
||||
auto field_indexing = vector_indexings_.get_field_indexing(fieldID);
|
||||
@ -1249,4 +1306,62 @@ SegmentSealedImpl::mask_with_timestamps(BitsetType& bitset_chunk,
|
||||
bitset_chunk |= mask;
|
||||
}
|
||||
|
||||
bool
|
||||
SegmentSealedImpl::generate_binlog_index(const FieldId field_id) {
|
||||
if (col_index_meta_ == nullptr)
|
||||
return false;
|
||||
auto& field_meta = schema_->operator[](field_id);
|
||||
|
||||
if (field_meta.is_vector() &&
|
||||
field_meta.get_data_type() == DataType::VECTOR_FLOAT &&
|
||||
segcore_config_.get_enable_interim_segment_index()) {
|
||||
try {
|
||||
auto& field_index_meta =
|
||||
col_index_meta_->GetFieldIndexMeta(field_id);
|
||||
auto& index_params = field_index_meta.GetIndexParams();
|
||||
if (index_params.find(knowhere::meta::INDEX_TYPE) ==
|
||||
index_params.end() ||
|
||||
index_params.at(knowhere::meta::INDEX_TYPE) ==
|
||||
knowhere::IndexEnum::INDEX_FAISS_IDMAP) {
|
||||
return false;
|
||||
}
|
||||
// get binlog data and meta
|
||||
auto row_count = num_rows_.value();
|
||||
auto dim = field_meta.get_dim();
|
||||
auto vec_data = fields_.at(field_id);
|
||||
auto dataset =
|
||||
knowhere::GenDataSet(row_count, dim, (void*)vec_data->Data());
|
||||
dataset->SetIsOwner(false);
|
||||
// generate index params
|
||||
auto field_binlog_config = std::unique_ptr<VecIndexConfig>(
|
||||
new VecIndexConfig(row_count,
|
||||
field_index_meta,
|
||||
segcore_config_,
|
||||
SegmentType::Sealed));
|
||||
auto build_config = field_binlog_config->GetBuildBaseParams();
|
||||
build_config[knowhere::meta::DIM] = std::to_string(dim);
|
||||
build_config[knowhere::meta::NUM_BUILD_THREAD] = std::to_string(1);
|
||||
auto index_metric = field_binlog_config->GetMetricType();
|
||||
|
||||
index::IndexBasePtr vec_index =
|
||||
std::make_unique<index::VectorMemIndex>(
|
||||
field_binlog_config->GetIndexType(),
|
||||
index_metric,
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber());
|
||||
vec_index->BuildWithDataset(dataset, build_config);
|
||||
vector_indexings_.append_field_indexing(
|
||||
field_id, index_metric, std::move(vec_index));
|
||||
|
||||
vec_binlog_config_[field_id] = std::move(field_binlog_config);
|
||||
set_bit(binlog_index_bitset_, field_id, true);
|
||||
|
||||
return true;
|
||||
} catch (std::exception& e) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
@ -33,12 +33,16 @@
|
||||
#include "index/ScalarIndex.h"
|
||||
#include "sys/mman.h"
|
||||
#include "common/Types.h"
|
||||
#include "common/IndexMeta.h"
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
class SegmentSealedImpl : public SegmentSealed {
|
||||
public:
|
||||
explicit SegmentSealedImpl(SchemaPtr schema, int64_t segment_id);
|
||||
explicit SegmentSealedImpl(SchemaPtr schema,
|
||||
IndexMetaPtr index_meta,
|
||||
const SegcoreConfig& segcore_config,
|
||||
int64_t segment_id);
|
||||
~SegmentSealedImpl() override;
|
||||
void
|
||||
LoadIndex(const LoadIndexInfo& info) override;
|
||||
@ -240,10 +244,14 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
void
|
||||
LoadScalarIndex(const LoadIndexInfo& info);
|
||||
|
||||
bool
|
||||
generate_binlog_index(const FieldId field_id);
|
||||
|
||||
private:
|
||||
// segment loading state
|
||||
BitsetType field_data_ready_bitset_;
|
||||
BitsetType index_ready_bitset_;
|
||||
BitsetType binlog_index_bitset_;
|
||||
std::atomic<int> system_ready_count_ = 0;
|
||||
// segment data
|
||||
|
||||
@ -266,11 +274,22 @@ class SegmentSealedImpl : public SegmentSealed {
|
||||
SchemaPtr schema_;
|
||||
int64_t id_;
|
||||
std::unordered_map<FieldId, std::shared_ptr<ColumnBase>> fields_;
|
||||
|
||||
// only useful in binlog
|
||||
IndexMetaPtr col_index_meta_;
|
||||
SegcoreConfig segcore_config_;
|
||||
std::unordered_map<FieldId, std::unique_ptr<VecIndexConfig>>
|
||||
vec_binlog_config_;
|
||||
};
|
||||
|
||||
inline SegmentSealedPtr
|
||||
CreateSealedSegment(SchemaPtr schema, int64_t segment_id = -1) {
|
||||
return std::make_unique<SegmentSealedImpl>(schema, segment_id);
|
||||
CreateSealedSegment(
|
||||
SchemaPtr schema,
|
||||
IndexMetaPtr index_meta = nullptr,
|
||||
int64_t segment_id = -1,
|
||||
const SegcoreConfig& segcore_config = SegcoreConfig::default_config()) {
|
||||
return std::make_unique<SegmentSealedImpl>(
|
||||
schema, index_meta, segcore_config, segment_id);
|
||||
}
|
||||
|
||||
} // namespace milvus::segcore
|
||||
|
@ -32,10 +32,10 @@ SegcoreSetChunkRows(const int64_t value) {
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
SegcoreSetEnableGrowingSegmentIndex(const bool value) {
|
||||
SegcoreSetEnableTempSegmentIndex(const bool value) {
|
||||
milvus::segcore::SegcoreConfig& config =
|
||||
milvus::segcore::SegcoreConfig::default_config();
|
||||
config.set_enable_growing_segment_index(value);
|
||||
config.set_enable_interim_segment_index(value);
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
|
@ -22,7 +22,7 @@ void
|
||||
SegcoreSetChunkRows(const int64_t);
|
||||
|
||||
void
|
||||
SegcoreSetEnableGrowingSegmentIndex(const bool);
|
||||
SegcoreSetEnableTempSegmentIndex(const bool);
|
||||
|
||||
void
|
||||
SegcoreSetNlist(const int64_t);
|
||||
|
@ -41,8 +41,8 @@ NewSegment(CCollection collection, SegmentType seg_type, int64_t segment_id) {
|
||||
}
|
||||
case Sealed:
|
||||
case Indexing:
|
||||
segment = milvus::segcore::CreateSealedSegment(col->get_schema(),
|
||||
segment_id);
|
||||
segment = milvus::segcore::CreateSealedSegment(
|
||||
col->get_schema(), col->GetIndexMeta(), segment_id);
|
||||
break;
|
||||
default:
|
||||
LOG_SEGCORE_ERROR_ << "invalid segment type "
|
||||
|
@ -57,6 +57,7 @@ set(MILVUS_TEST_FILES
|
||||
test_always_true_expr.cpp
|
||||
test_plan_proto.cpp
|
||||
test_chunk_cache.cpp
|
||||
test_binlog_index.cpp
|
||||
)
|
||||
|
||||
if ( BUILD_DISK_ANN STREQUAL "ON" )
|
||||
|
@ -75,7 +75,7 @@ Search_GrowingIndex(benchmark::State& state) {
|
||||
FieldIndexMeta fieldIndexMeta(schema->get_field_id(FieldName("fakevec")),
|
||||
std::move(index_params),
|
||||
std::move(type_params));
|
||||
segconf.set_enable_growing_segment_index(true);
|
||||
segconf.set_enable_interim_segment_index(true);
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {
|
||||
{schema->get_field_id(FieldName("fakevec")), fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
|
326
internal/core/unittest/test_binlog_index.cpp
Normal file
326
internal/core/unittest/test_binlog_index.cpp
Normal file
@ -0,0 +1,326 @@
|
||||
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
||||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
||||
// or implied. See the License for the specific language governing permissions and limitations under the License
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <boost/format.hpp>
|
||||
#include <regex>
|
||||
|
||||
#include "pb/plan.pb.h"
|
||||
#include "segcore/segcore_init_c.h"
|
||||
#include "segcore/SegmentSealed.h"
|
||||
#include "segcore/SegmentSealedImpl.h"
|
||||
#include "pb/schema.pb.h"
|
||||
#include "test_utils/DataGen.h"
|
||||
#include "index/IndexFactory.h"
|
||||
#include "query/Plan.h"
|
||||
#include "knowhere/comp/brute_force.h"
|
||||
|
||||
using namespace milvus::segcore;
|
||||
using namespace milvus;
|
||||
namespace pb = milvus::proto;
|
||||
|
||||
std::shared_ptr<float[]>
|
||||
GenRandomFloatVecData(int rows, int dim, int seed = 42) {
|
||||
std::shared_ptr<float[]> vecs =
|
||||
std::shared_ptr<float[]>(new float[rows * dim]);
|
||||
std::mt19937 rng(seed);
|
||||
std::uniform_int_distribution<> distrib(0.0, 100.0);
|
||||
for (int i = 0; i < rows * dim; ++i) vecs[i] = (float)distrib(rng);
|
||||
return std::move(vecs);
|
||||
}
|
||||
|
||||
inline float
|
||||
GetKnnSearchRecall(
|
||||
size_t nq, int64_t* gt_ids, size_t gt_k, int64_t* res_ids, size_t res_k) {
|
||||
uint32_t matched_num = 0;
|
||||
for (auto i = 0; i < nq; ++i) {
|
||||
std::vector<int64_t> ids_0(gt_ids + i * gt_k,
|
||||
gt_ids + i * gt_k + res_k);
|
||||
std::vector<int64_t> ids_1(res_ids + i * res_k,
|
||||
res_ids + i * res_k + res_k);
|
||||
|
||||
std::sort(ids_0.begin(), ids_0.end());
|
||||
std::sort(ids_1.begin(), ids_1.end());
|
||||
|
||||
std::vector<int64_t> v(std::max(ids_0.size(), ids_1.size()));
|
||||
std::vector<int64_t>::iterator it;
|
||||
it = std::set_intersection(
|
||||
ids_0.begin(), ids_0.end(), ids_1.begin(), ids_1.end(), v.begin());
|
||||
v.resize(it - v.begin());
|
||||
matched_num += v.size();
|
||||
}
|
||||
return ((float)matched_num) / ((float)nq * res_k);
|
||||
}
|
||||
|
||||
using Param = const char*;
|
||||
class BinlogIndexTest : public ::testing::TestWithParam<Param> {
|
||||
void
|
||||
SetUp() override {
|
||||
auto param = GetParam();
|
||||
metricType = param;
|
||||
|
||||
schema = std::make_shared<Schema>();
|
||||
|
||||
auto metric_type = metricType;
|
||||
vec_field_id = schema->AddDebugField(
|
||||
"fakevec", DataType::VECTOR_FLOAT, data_d, metric_type);
|
||||
auto i64_fid = schema->AddDebugField("counter", DataType::INT64);
|
||||
schema->set_primary_field_id(i64_fid);
|
||||
|
||||
// generate vector field data
|
||||
vec_data = GenRandomFloatVecData(data_n, data_d);
|
||||
|
||||
vec_field_data =
|
||||
storage::CreateFieldData(DataType::VECTOR_FLOAT, data_d);
|
||||
vec_field_data->FillFieldData(vec_data.get(), data_n);
|
||||
}
|
||||
|
||||
public:
|
||||
IndexMetaPtr
|
||||
GetCollectionIndexMeta(std::string index_type) {
|
||||
std::map<std::string, std::string> index_params = {
|
||||
{"index_type", index_type},
|
||||
{"metric_type", metricType},
|
||||
{"nlist", "1024"}};
|
||||
std::map<std::string, std::string> type_params = {{"dim", "128"}};
|
||||
FieldIndexMeta fieldIndexMeta(
|
||||
vec_field_id, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(1024);
|
||||
config.set_enable_interim_segment_index(true);
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {
|
||||
{vec_field_id, fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(226985, std::move(filedMap));
|
||||
return std::move(metaPtr);
|
||||
}
|
||||
|
||||
void
|
||||
LoadOtherFields() {
|
||||
auto dataset = DataGen(schema, data_n);
|
||||
// load id
|
||||
LoadFieldDataInfo row_id_info;
|
||||
FieldMeta row_id_field_meta(
|
||||
FieldName("RowID"), RowFieldID, DataType::INT64);
|
||||
auto field_data = std::make_shared<milvus::storage::FieldData<int64_t>>(
|
||||
DataType::INT64);
|
||||
field_data->FillFieldData(dataset.row_ids_.data(), data_n);
|
||||
auto field_data_info =
|
||||
FieldDataInfo{RowFieldID.get(),
|
||||
data_n,
|
||||
std::vector<storage::FieldDataPtr>{field_data}};
|
||||
segment->LoadFieldData(RowFieldID, field_data_info);
|
||||
// load ts
|
||||
LoadFieldDataInfo ts_info;
|
||||
FieldMeta ts_field_meta(
|
||||
FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
|
||||
field_data = std::make_shared<milvus::storage::FieldData<int64_t>>(
|
||||
DataType::INT64);
|
||||
field_data->FillFieldData(dataset.timestamps_.data(), data_n);
|
||||
field_data_info =
|
||||
FieldDataInfo{TimestampFieldID.get(),
|
||||
data_n,
|
||||
std::vector<storage::FieldDataPtr>{field_data}};
|
||||
segment->LoadFieldData(TimestampFieldID, field_data_info);
|
||||
}
|
||||
|
||||
protected:
|
||||
milvus::SchemaPtr schema;
|
||||
const char* metricType;
|
||||
size_t data_n = 10000;
|
||||
size_t data_d = 128;
|
||||
size_t topk = 10;
|
||||
milvus::storage::FieldDataPtr vec_field_data = nullptr;
|
||||
milvus::segcore::SegmentSealedPtr segment = nullptr;
|
||||
milvus::FieldId vec_field_id;
|
||||
std::shared_ptr<float[]> vec_data;
|
||||
};
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(MetricTypeParameters,
|
||||
BinlogIndexTest,
|
||||
::testing::Values(knowhere::metric::L2));
|
||||
|
||||
TEST_P(BinlogIndexTest, Accuracy) {
|
||||
IndexMetaPtr collection_index_meta =
|
||||
GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
|
||||
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
LoadOtherFields();
|
||||
|
||||
auto& segcore_config = milvus::segcore::SegcoreConfig::default_config();
|
||||
segcore_config.set_enable_interim_segment_index(true);
|
||||
segcore_config.set_nprobe(32);
|
||||
// 1. load field data, and build binlog index for binlog data
|
||||
auto field_data_info =
|
||||
FieldDataInfo{vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<storage::FieldDataPtr>{vec_field_data}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
//assert segment has been built binlog index
|
||||
EXPECT_TRUE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
|
||||
|
||||
// 2. search binlog index
|
||||
auto num_queries = 10;
|
||||
auto query_ptr = GenRandomFloatVecData(num_queries, data_d);
|
||||
|
||||
milvus::proto::plan::PlanNode plan_node;
|
||||
auto vector_anns = plan_node.mutable_vector_anns();
|
||||
vector_anns->set_vector_type(milvus::proto::plan::VectorType::FloatVector);
|
||||
vector_anns->set_placeholder_tag("$0");
|
||||
vector_anns->set_field_id(vec_field_id.get());
|
||||
auto query_info = vector_anns->mutable_query_info();
|
||||
query_info->set_topk(topk);
|
||||
query_info->set_round_decimal(3);
|
||||
query_info->set_metric_type(metricType);
|
||||
query_info->set_search_params(R"({"nprobe": 1024})");
|
||||
auto plan_str = plan_node.SerializeAsString();
|
||||
|
||||
auto ph_group_raw =
|
||||
CreatePlaceholderGroupFromBlob(num_queries, data_d, query_ptr.get());
|
||||
|
||||
auto plan = milvus::query::CreateSearchPlanByExpr(
|
||||
*schema, plan_str.data(), plan_str.size());
|
||||
auto ph_group =
|
||||
ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
||||
|
||||
std::vector<const milvus::query::PlaceholderGroup*> ph_group_arr = {
|
||||
ph_group.get()};
|
||||
auto nlist = segcore_config.get_nlist();
|
||||
auto binlog_index_sr = segment->Search(plan.get(), ph_group.get());
|
||||
ASSERT_EQ(binlog_index_sr->total_nq_, num_queries);
|
||||
EXPECT_EQ(binlog_index_sr->unity_topK_, topk);
|
||||
EXPECT_EQ(binlog_index_sr->distances_.size(), num_queries * topk);
|
||||
EXPECT_EQ(binlog_index_sr->seg_offsets_.size(), num_queries * topk);
|
||||
|
||||
// 3. update vector index
|
||||
{
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = metricType;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
|
||||
create_index_info.index_engine_version =
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, milvus::storage::FileManagerContext());
|
||||
|
||||
auto build_conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, metricType},
|
||||
{knowhere::meta::DIM, std::to_string(data_d)},
|
||||
{knowhere::indexparam::NLIST, "1024"}};
|
||||
|
||||
auto database = knowhere::GenDataSet(data_n, data_d, vec_data.get());
|
||||
indexing->BuildWithDataset(database, build_conf);
|
||||
|
||||
LoadIndexInfo load_info;
|
||||
load_info.field_id = vec_field_id.get();
|
||||
|
||||
load_info.index = std::move(indexing);
|
||||
load_info.index_params["metric_type"] = metricType;
|
||||
segment->DropFieldData(vec_field_id);
|
||||
ASSERT_NO_THROW(segment->LoadIndex(load_info));
|
||||
EXPECT_TRUE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
|
||||
auto ivf_sr = segment->Search(plan.get(), ph_group.get());
|
||||
auto similary = GetKnnSearchRecall(num_queries,
|
||||
binlog_index_sr->seg_offsets_.data(),
|
||||
topk,
|
||||
ivf_sr->seg_offsets_.data(),
|
||||
topk);
|
||||
ASSERT_GT(similary, 0.45);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_P(BinlogIndexTest, DisableInterimIndex) {
|
||||
IndexMetaPtr collection_index_meta =
|
||||
GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IVFFLAT);
|
||||
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
LoadOtherFields();
|
||||
SegcoreSetEnableTempSegmentIndex(false);
|
||||
|
||||
auto field_data_info =
|
||||
FieldDataInfo{vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<storage::FieldDataPtr>{vec_field_data}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
EXPECT_FALSE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_TRUE(segment->HasFieldData(vec_field_id));
|
||||
// load vector index
|
||||
milvus::index::CreateIndexInfo create_index_info;
|
||||
create_index_info.field_type = DataType::VECTOR_FLOAT;
|
||||
create_index_info.metric_type = metricType;
|
||||
create_index_info.index_type = knowhere::IndexEnum::INDEX_FAISS_IVFFLAT;
|
||||
create_index_info.index_engine_version =
|
||||
knowhere::Version::GetCurrentVersion().VersionNumber();
|
||||
auto indexing = milvus::index::IndexFactory::GetInstance().CreateIndex(
|
||||
create_index_info, milvus::storage::FileManagerContext());
|
||||
|
||||
auto build_conf =
|
||||
knowhere::Json{{knowhere::meta::METRIC_TYPE, metricType},
|
||||
{knowhere::meta::DIM, std::to_string(data_d)},
|
||||
{knowhere::indexparam::NLIST, "1024"}};
|
||||
|
||||
auto database = knowhere::GenDataSet(data_n, data_d, vec_data.get());
|
||||
indexing->BuildWithDataset(database, build_conf);
|
||||
|
||||
LoadIndexInfo load_info;
|
||||
load_info.field_id = vec_field_id.get();
|
||||
|
||||
load_info.index = std::move(indexing);
|
||||
load_info.index_params["metric_type"] = metricType;
|
||||
|
||||
segment->DropFieldData(vec_field_id);
|
||||
ASSERT_NO_THROW(segment->LoadIndex(load_info));
|
||||
EXPECT_TRUE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_FALSE(segment->HasFieldData(vec_field_id));
|
||||
}
|
||||
|
||||
TEST_P(BinlogIndexTest, LoadBingLogWihIDMAP) {
|
||||
IndexMetaPtr collection_index_meta =
|
||||
GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IDMAP);
|
||||
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
LoadOtherFields();
|
||||
|
||||
auto field_data_info =
|
||||
FieldDataInfo{vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<storage::FieldDataPtr>{vec_field_data}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
EXPECT_FALSE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_TRUE(segment->HasFieldData(vec_field_id));
|
||||
}
|
||||
|
||||
TEST_P(BinlogIndexTest, LoadBinlogWithoutIndexMeta) {
|
||||
IndexMetaPtr collection_index_meta =
|
||||
GetCollectionIndexMeta(knowhere::IndexEnum::INDEX_FAISS_IDMAP);
|
||||
|
||||
segment = CreateSealedSegment(schema, collection_index_meta);
|
||||
SegcoreSetEnableTempSegmentIndex(true);
|
||||
|
||||
auto field_data_info =
|
||||
FieldDataInfo{vec_field_id.get(),
|
||||
data_n,
|
||||
std::vector<storage::FieldDataPtr>{vec_field_data}};
|
||||
segment->LoadFieldData(vec_field_id, field_data_info);
|
||||
|
||||
EXPECT_FALSE(segment->HasIndex(vec_field_id));
|
||||
EXPECT_EQ(segment->get_row_count(), data_n);
|
||||
EXPECT_TRUE(segment->HasFieldData(vec_field_id));
|
||||
}
|
@ -177,7 +177,7 @@ TEST(Float16, GetVector) {
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(1024);
|
||||
config.set_enable_growing_segment_index(true);
|
||||
config.set_enable_interim_segment_index(true);
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {{vec, fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
|
@ -133,7 +133,7 @@ TEST(Growing, FillData) {
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(1024);
|
||||
config.set_enable_growing_segment_index(true);
|
||||
config.set_enable_interim_segment_index(true);
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {{vec, fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
|
@ -37,7 +37,7 @@ TEST(GrowingIndex, Correctness) {
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(1024);
|
||||
config.set_enable_growing_segment_index(true);
|
||||
config.set_enable_interim_segment_index(true);
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {{vec, fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(226985, std::move(filedMap));
|
||||
@ -161,7 +161,7 @@ TEST_P(GrowingIndexGetVectorTest, GetVector) {
|
||||
vec, std::move(index_params), std::move(type_params));
|
||||
auto& config = SegcoreConfig::default_config();
|
||||
config.set_chunk_rows(1024);
|
||||
config.set_enable_growing_segment_index(true);
|
||||
config.set_enable_interim_segment_index(true);
|
||||
std::map<FieldId, FieldIndexMeta> filedMap = {{vec, fieldIndexMeta}};
|
||||
IndexMetaPtr metaPtr =
|
||||
std::make_shared<CollectionIndexMeta>(100000, std::move(filedMap));
|
||||
|
@ -237,7 +237,6 @@ func (BinaryExpr_BinaryOp) EnumDescriptor() ([]byte, []int) {
|
||||
|
||||
type GenericValue struct {
|
||||
// Types that are valid to be assigned to Val:
|
||||
//
|
||||
// *GenericValue_BoolVal
|
||||
// *GenericValue_Int64Val
|
||||
// *GenericValue_FloatVal
|
||||
@ -1298,7 +1297,6 @@ var xxx_messageInfo_AlwaysTrueExpr proto.InternalMessageInfo
|
||||
|
||||
type Expr struct {
|
||||
// Types that are valid to be assigned to Expr:
|
||||
//
|
||||
// *Expr_TermExpr
|
||||
// *Expr_UnaryExpr
|
||||
// *Expr_BinaryExpr
|
||||
@ -1670,7 +1668,6 @@ func (m *QueryPlanNode) GetLimit() int64 {
|
||||
|
||||
type PlanNode struct {
|
||||
// Types that are valid to be assigned to Node:
|
||||
//
|
||||
// *PlanNode_VectorAnns
|
||||
// *PlanNode_Predicates
|
||||
// *PlanNode_Query
|
||||
|
@ -29,9 +29,8 @@ const _ = proto.ProtoPackageIsVersion3 // please upgrade the proto package
|
||||
|
||||
type InvalidateCollMetaCacheRequest struct {
|
||||
// MsgType:
|
||||
//
|
||||
// DropCollection -> {meta cache, dml channels}
|
||||
// Other -> {meta cache}
|
||||
// DropCollection -> {meta cache, dml channels}
|
||||
// Other -> {meta cache}
|
||||
Base *commonpb.MsgBase `protobuf:"bytes,1,opt,name=base,proto3" json:"base,omitempty"`
|
||||
DbName string `protobuf:"bytes,2,opt,name=db_name,json=dbName,proto3" json:"db_name,omitempty"`
|
||||
CollectionName string `protobuf:"bytes,3,opt,name=collection_name,json=collectionName,proto3" json:"collection_name,omitempty"`
|
||||
|
@ -793,28 +793,28 @@ type RootCoordClient interface {
|
||||
GetComponentStates(ctx context.Context, in *milvuspb.GetComponentStatesRequest, opts ...grpc.CallOption) (*milvuspb.ComponentStates, error)
|
||||
GetTimeTickChannel(ctx context.Context, in *internalpb.GetTimeTickChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error)
|
||||
GetStatisticsChannel(ctx context.Context, in *internalpb.GetStatisticsChannelRequest, opts ...grpc.CallOption) (*milvuspb.StringResponse, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to create collection
|
||||
//
|
||||
// @param CreateCollectionRequest, use to provide collection information to be created.
|
||||
//
|
||||
// @return Status
|
||||
CreateCollection(ctx context.Context, in *milvuspb.CreateCollectionRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to delete collection.
|
||||
//
|
||||
// @param DropCollectionRequest, collection name is going to be deleted.
|
||||
//
|
||||
// @return Status
|
||||
DropCollection(ctx context.Context, in *milvuspb.DropCollectionRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to test collection existence.
|
||||
//
|
||||
// @param HasCollectionRequest, collection name is going to be tested.
|
||||
//
|
||||
// @return BoolResponse
|
||||
HasCollection(ctx context.Context, in *milvuspb.HasCollectionRequest, opts ...grpc.CallOption) (*milvuspb.BoolResponse, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to get collection schema.
|
||||
//
|
||||
// @param DescribeCollectionRequest, target collection name.
|
||||
@ -825,28 +825,28 @@ type RootCoordClient interface {
|
||||
CreateAlias(ctx context.Context, in *milvuspb.CreateAliasRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
DropAlias(ctx context.Context, in *milvuspb.DropAliasRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
AlterAlias(ctx context.Context, in *milvuspb.AlterAliasRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to list all collections.
|
||||
//
|
||||
// @return StringListResponse, collection name list
|
||||
ShowCollections(ctx context.Context, in *milvuspb.ShowCollectionsRequest, opts ...grpc.CallOption) (*milvuspb.ShowCollectionsResponse, error)
|
||||
AlterCollection(ctx context.Context, in *milvuspb.AlterCollectionRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to create partition
|
||||
//
|
||||
// @return Status
|
||||
CreatePartition(ctx context.Context, in *milvuspb.CreatePartitionRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to drop partition
|
||||
//
|
||||
// @return Status
|
||||
DropPartition(ctx context.Context, in *milvuspb.DropPartitionRequest, opts ...grpc.CallOption) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to test partition existence.
|
||||
//
|
||||
// @return BoolResponse
|
||||
HasPartition(ctx context.Context, in *milvuspb.HasPartitionRequest, opts ...grpc.CallOption) (*milvuspb.BoolResponse, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to show partition information
|
||||
//
|
||||
// @param ShowPartitionRequest, target collection name.
|
||||
@ -854,7 +854,7 @@ type RootCoordClient interface {
|
||||
// @return StringListResponse
|
||||
ShowPartitions(ctx context.Context, in *milvuspb.ShowPartitionsRequest, opts ...grpc.CallOption) (*milvuspb.ShowPartitionsResponse, error)
|
||||
ShowPartitionsInternal(ctx context.Context, in *milvuspb.ShowPartitionsRequest, opts ...grpc.CallOption) (*milvuspb.ShowPartitionsResponse, error)
|
||||
// rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {}
|
||||
// rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {}
|
||||
ShowSegments(ctx context.Context, in *milvuspb.ShowSegmentsRequest, opts ...grpc.CallOption) (*milvuspb.ShowSegmentsResponse, error)
|
||||
AllocTimestamp(ctx context.Context, in *AllocTimestampRequest, opts ...grpc.CallOption) (*AllocTimestampResponse, error)
|
||||
AllocID(ctx context.Context, in *AllocIDRequest, opts ...grpc.CallOption) (*AllocIDResponse, error)
|
||||
@ -1327,28 +1327,28 @@ type RootCoordServer interface {
|
||||
GetComponentStates(context.Context, *milvuspb.GetComponentStatesRequest) (*milvuspb.ComponentStates, error)
|
||||
GetTimeTickChannel(context.Context, *internalpb.GetTimeTickChannelRequest) (*milvuspb.StringResponse, error)
|
||||
GetStatisticsChannel(context.Context, *internalpb.GetStatisticsChannelRequest) (*milvuspb.StringResponse, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to create collection
|
||||
//
|
||||
// @param CreateCollectionRequest, use to provide collection information to be created.
|
||||
//
|
||||
// @return Status
|
||||
CreateCollection(context.Context, *milvuspb.CreateCollectionRequest) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to delete collection.
|
||||
//
|
||||
// @param DropCollectionRequest, collection name is going to be deleted.
|
||||
//
|
||||
// @return Status
|
||||
DropCollection(context.Context, *milvuspb.DropCollectionRequest) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to test collection existence.
|
||||
//
|
||||
// @param HasCollectionRequest, collection name is going to be tested.
|
||||
//
|
||||
// @return BoolResponse
|
||||
HasCollection(context.Context, *milvuspb.HasCollectionRequest) (*milvuspb.BoolResponse, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to get collection schema.
|
||||
//
|
||||
// @param DescribeCollectionRequest, target collection name.
|
||||
@ -1359,28 +1359,28 @@ type RootCoordServer interface {
|
||||
CreateAlias(context.Context, *milvuspb.CreateAliasRequest) (*commonpb.Status, error)
|
||||
DropAlias(context.Context, *milvuspb.DropAliasRequest) (*commonpb.Status, error)
|
||||
AlterAlias(context.Context, *milvuspb.AlterAliasRequest) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to list all collections.
|
||||
//
|
||||
// @return StringListResponse, collection name list
|
||||
ShowCollections(context.Context, *milvuspb.ShowCollectionsRequest) (*milvuspb.ShowCollectionsResponse, error)
|
||||
AlterCollection(context.Context, *milvuspb.AlterCollectionRequest) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to create partition
|
||||
//
|
||||
// @return Status
|
||||
CreatePartition(context.Context, *milvuspb.CreatePartitionRequest) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to drop partition
|
||||
//
|
||||
// @return Status
|
||||
DropPartition(context.Context, *milvuspb.DropPartitionRequest) (*commonpb.Status, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to test partition existence.
|
||||
//
|
||||
// @return BoolResponse
|
||||
HasPartition(context.Context, *milvuspb.HasPartitionRequest) (*milvuspb.BoolResponse, error)
|
||||
// *
|
||||
//*
|
||||
// @brief This method is used to show partition information
|
||||
//
|
||||
// @param ShowPartitionRequest, target collection name.
|
||||
@ -1388,7 +1388,7 @@ type RootCoordServer interface {
|
||||
// @return StringListResponse
|
||||
ShowPartitions(context.Context, *milvuspb.ShowPartitionsRequest) (*milvuspb.ShowPartitionsResponse, error)
|
||||
ShowPartitionsInternal(context.Context, *milvuspb.ShowPartitionsRequest) (*milvuspb.ShowPartitionsResponse, error)
|
||||
// rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {}
|
||||
// rpc DescribeSegment(milvus.DescribeSegmentRequest) returns (milvus.DescribeSegmentResponse) {}
|
||||
ShowSegments(context.Context, *milvuspb.ShowSegmentsRequest) (*milvuspb.ShowSegmentsResponse, error)
|
||||
AllocTimestamp(context.Context, *AllocTimestampRequest) (*AllocTimestampResponse, error)
|
||||
AllocID(context.Context, *AllocIDRequest) (*AllocIDResponse, error)
|
||||
|
@ -193,13 +193,13 @@ func (node *QueryNode) InitSegcore() error {
|
||||
cKnowhereThreadPoolSize := C.uint32_t(paramtable.Get().QueryNodeCfg.KnowhereThreadPoolSize.GetAsUint32())
|
||||
C.SegcoreSetKnowhereSearchThreadPoolNum(cKnowhereThreadPoolSize)
|
||||
|
||||
enableGrowingIndex := C.bool(paramtable.Get().QueryNodeCfg.EnableGrowingSegmentIndex.GetAsBool())
|
||||
C.SegcoreSetEnableGrowingSegmentIndex(enableGrowingIndex)
|
||||
enableGrowingIndex := C.bool(paramtable.Get().QueryNodeCfg.EnableTempSegmentIndex.GetAsBool())
|
||||
C.SegcoreSetEnableTempSegmentIndex(enableGrowingIndex)
|
||||
|
||||
nlist := C.int64_t(paramtable.Get().QueryNodeCfg.GrowingIndexNlist.GetAsInt64())
|
||||
nlist := C.int64_t(paramtable.Get().QueryNodeCfg.InterimIndexNlist.GetAsInt64())
|
||||
C.SegcoreSetNlist(nlist)
|
||||
|
||||
nprobe := C.int64_t(paramtable.Get().QueryNodeCfg.GrowingIndexNProbe.GetAsInt64())
|
||||
nprobe := C.int64_t(paramtable.Get().QueryNodeCfg.InterimIndexNProbe.GetAsInt64())
|
||||
C.SegcoreSetNprobe(nprobe)
|
||||
|
||||
// override segcore SIMD type
|
||||
|
@ -1541,11 +1541,11 @@ type queryNodeConfig struct {
|
||||
StatsPublishInterval ParamItem `refreshable:"true"`
|
||||
|
||||
// segcore
|
||||
KnowhereThreadPoolSize ParamItem `refreshable:"false"`
|
||||
ChunkRows ParamItem `refreshable:"false"`
|
||||
EnableGrowingSegmentIndex ParamItem `refreshable:"false"`
|
||||
GrowingIndexNlist ParamItem `refreshable:"false"`
|
||||
GrowingIndexNProbe ParamItem `refreshable:"false"`
|
||||
KnowhereThreadPoolSize ParamItem `refreshable:"false"`
|
||||
ChunkRows ParamItem `refreshable:"false"`
|
||||
EnableTempSegmentIndex ParamItem `refreshable:"false"`
|
||||
InterimIndexNlist ParamItem `refreshable:"false"`
|
||||
InterimIndexNProbe ParamItem `refreshable:"false"`
|
||||
|
||||
// memory limit
|
||||
LoadMemoryUsageFactor ParamItem `refreshable:"true"`
|
||||
@ -1666,42 +1666,42 @@ func (p *queryNodeConfig) init(base *BaseTable) {
|
||||
}
|
||||
p.ChunkRows.Init(base.mgr)
|
||||
|
||||
p.EnableGrowingSegmentIndex = ParamItem{
|
||||
Key: "queryNode.segcore.growing.enableIndex",
|
||||
p.EnableTempSegmentIndex = ParamItem{
|
||||
Key: "queryNode.segcore.interimIndex.enableIndex",
|
||||
Version: "2.0.0",
|
||||
DefaultValue: "false",
|
||||
Doc: "Enable segment growing with index to accelerate vector search.",
|
||||
Doc: "Enable segment build with index to accelerate vector search when segment is in growing or binlog.",
|
||||
Export: true,
|
||||
}
|
||||
p.EnableGrowingSegmentIndex.Init(base.mgr)
|
||||
p.EnableTempSegmentIndex.Init(base.mgr)
|
||||
|
||||
p.GrowingIndexNlist = ParamItem{
|
||||
Key: "queryNode.segcore.growing.nlist",
|
||||
p.InterimIndexNlist = ParamItem{
|
||||
Key: "queryNode.segcore.interimIndex.nlist",
|
||||
Version: "2.0.0",
|
||||
DefaultValue: "128",
|
||||
Doc: "growing index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8",
|
||||
Doc: "temp index nlist, recommend to set sqrt(chunkRows), must smaller than chunkRows/8",
|
||||
Export: true,
|
||||
}
|
||||
p.GrowingIndexNlist.Init(base.mgr)
|
||||
p.InterimIndexNlist.Init(base.mgr)
|
||||
|
||||
p.GrowingIndexNProbe = ParamItem{
|
||||
Key: "queryNode.segcore.growing.nprobe",
|
||||
p.InterimIndexNProbe = ParamItem{
|
||||
Key: "queryNode.segcore.interimIndex.nprobe",
|
||||
Version: "2.0.0",
|
||||
Formatter: func(v string) string {
|
||||
defaultNprobe := p.GrowingIndexNlist.GetAsInt64() / 8
|
||||
defaultNprobe := p.InterimIndexNlist.GetAsInt64() / 8
|
||||
nprobe := getAsInt64(v)
|
||||
if nprobe == 0 {
|
||||
nprobe = defaultNprobe
|
||||
}
|
||||
if nprobe > p.GrowingIndexNlist.GetAsInt64() {
|
||||
return p.GrowingIndexNlist.GetValue()
|
||||
if nprobe > p.InterimIndexNlist.GetAsInt64() {
|
||||
return p.InterimIndexNlist.GetValue()
|
||||
}
|
||||
return strconv.FormatInt(nprobe, 10)
|
||||
},
|
||||
Doc: "nprobe to search small index, based on your accuracy requirement, must smaller than nlist",
|
||||
Export: true,
|
||||
}
|
||||
p.GrowingIndexNProbe.Init(base.mgr)
|
||||
p.InterimIndexNProbe.Init(base.mgr)
|
||||
|
||||
p.LoadMemoryUsageFactor = ParamItem{
|
||||
Key: "queryNode.loadMemoryUsageFactor",
|
||||
|
@ -297,10 +297,10 @@ func TestComponentParam(t *testing.T) {
|
||||
chunkRows := Params.ChunkRows.GetAsInt64()
|
||||
assert.Equal(t, int64(1024), chunkRows)
|
||||
|
||||
nlist := Params.GrowingIndexNlist.GetAsInt64()
|
||||
nlist := Params.InterimIndexNlist.GetAsInt64()
|
||||
assert.Equal(t, int64(128), nlist)
|
||||
|
||||
nprobe := Params.GrowingIndexNProbe.GetAsInt64()
|
||||
nprobe := Params.InterimIndexNProbe.GetAsInt64()
|
||||
assert.Equal(t, int64(16), nprobe)
|
||||
|
||||
assert.Equal(t, true, Params.GroupEnabled.GetAsBool())
|
||||
@ -319,17 +319,17 @@ func TestComponentParam(t *testing.T) {
|
||||
chunkRows = Params.ChunkRows.GetAsInt64()
|
||||
assert.Equal(t, int64(8192), chunkRows)
|
||||
|
||||
enableGrowingIndex := Params.EnableGrowingSegmentIndex.GetAsBool()
|
||||
assert.Equal(t, true, enableGrowingIndex)
|
||||
enableInterimIndex := Params.EnableTempSegmentIndex.GetAsBool()
|
||||
assert.Equal(t, true, enableInterimIndex)
|
||||
|
||||
params.Save("queryNode.segcore.growing.enableIndex", "true")
|
||||
enableGrowingIndex = Params.EnableGrowingSegmentIndex.GetAsBool()
|
||||
assert.Equal(t, true, enableGrowingIndex)
|
||||
params.Save("queryNode.segcore.interimIndex.enableIndex", "true")
|
||||
enableInterimIndex = Params.EnableTempSegmentIndex.GetAsBool()
|
||||
assert.Equal(t, true, enableInterimIndex)
|
||||
|
||||
nlist = Params.GrowingIndexNlist.GetAsInt64()
|
||||
nlist = Params.InterimIndexNlist.GetAsInt64()
|
||||
assert.Equal(t, int64(128), nlist)
|
||||
|
||||
nprobe = Params.GrowingIndexNProbe.GetAsInt64()
|
||||
nprobe = Params.InterimIndexNProbe.GetAsInt64()
|
||||
assert.Equal(t, int64(16), nprobe)
|
||||
|
||||
params.Remove("queryNode.segcore.growing.nlist")
|
||||
|
@ -2991,7 +2991,7 @@ class TestCollectionSearch(TestcaseBase):
|
||||
filter_ids.append(_id)
|
||||
|
||||
# 2. create index
|
||||
index_param = {"index_type": "IVF_FLAT", "metric_type": "COSINE", "params": {"nlist": 100}}
|
||||
index_param = {"index_type": "FLAT", "metric_type": "COSINE", "params": {}}
|
||||
collection_w.create_index("float_vector", index_param)
|
||||
collection_w.load()
|
||||
|
||||
@ -7381,7 +7381,7 @@ class TestCollectionRangeSearch(TestcaseBase):
|
||||
filter_ids.append(_id)
|
||||
|
||||
# 2. create index
|
||||
index_param = {"index_type": "IVF_FLAT",
|
||||
index_param = {"index_type": "FLAT",
|
||||
"metric_type": "L2", "params": {"nlist": 100}}
|
||||
collection_w.create_index("float_vector", index_param)
|
||||
collection_w.load()
|
||||
|
Loading…
Reference in New Issue
Block a user