milvus/internal/core/bench/bench_search.cpp
FluorineDog ef98dab2a9 Support segcore config
Signed-off-by: FluorineDog <guilin.gou@zilliz.com>
2021-03-04 17:09:48 +08:00

121 lines
3.8 KiB
C++

// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License
#include <cstdint>
#include <benchmark/benchmark.h>
#include <string>
#include "segcore/SegmentGrowing.h"
#include "segcore/SegmentSealed.h"
#include "test_utils/DataGen.h"
using namespace milvus;
using namespace milvus::query;
using namespace milvus::segcore;
static int dim = 128;
static int64_t N = 1024 * 1024 * 1;
const auto schema = []() {
auto schema = std::make_shared<Schema>();
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_L2);
return schema;
}();
const auto dataset_ = [] {
auto dataset_ = DataGen(schema, N);
return dataset_;
}();
const auto plan = [] {
std::string dsl = R"({
"bool": {
"must": [
{
"vector": {
"fakevec": {
"metric_type": "L2",
"params": {
"nprobe": 4
},
"query": "$0",
"topk": 5
}
}
}
]
}
})";
auto plan = CreatePlan(*schema, dsl);
return plan;
}();
auto ph_group = [] {
auto num_queries = 5;
auto ph_group_raw = CreatePlaceholderGroup(num_queries, dim, 1024);
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
return ph_group;
}();
static void
Search_SmallIndex(benchmark::State& state) {
// schema->AddDebugField("age", DataType::FLOAT);
auto is_small_index = state.range(0);
auto chunk_size = state.range(1) * 1024;
auto segconf = SegcoreConfig::default_config();
segconf.set_size_per_chunk(chunk_size);
auto segment = CreateGrowingSegment(schema, segconf);
if (!is_small_index) {
segment->debug_disable_small_index();
}
segment->PreInsert(N);
ColumnBasedRawData raw_data;
raw_data.columns_ = dataset_.cols_;
raw_data.count = N;
segment->Insert(0, N, dataset_.row_ids_.data(), dataset_.timestamps_.data(), raw_data);
Timestamp time = 10000000;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
for (auto _ : state) {
auto qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1);
}
}
BENCHMARK(Search_SmallIndex)->MinTime(5)->ArgsProduct({{true, false}, {8, 16, 32, 64, 128}});
static void
Search_Sealed(benchmark::State& state) {
auto segment = CreateSealedSegment(schema);
SealedLoader(dataset_, *segment);
auto choice = state.range(0);
if (choice == 0) {
// Brute Force
} else if (choice == 1) {
// ivf
auto vec = (const float*)dataset_.cols_[0].data();
auto indexing = GenIndexing(N, dim, vec);
LoadIndexInfo info;
info.index = indexing;
info.index_params["index_type"] = "IVF";
info.index_params["index_mode"] = "CPU";
info.index_params["metric_type"] = MetricTypeToName(MetricType::METRIC_L2);
segment->LoadIndex(info);
}
Timestamp time = 10000000;
std::vector<const PlaceholderGroup*> ph_group_arr = {ph_group.get()};
for (auto _ : state) {
auto qr = segment->Search(plan.get(), ph_group_arr.data(), &time, 1);
}
}
BENCHMARK(Search_Sealed)->MinTime(5)->Arg(1)->Arg(0);