2020-11-24 21:28:38 +08:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License
|
|
|
|
|
2020-11-03 11:45:48 +08:00
|
|
|
#include <gtest/gtest.h>
|
2020-12-12 18:17:53 +08:00
|
|
|
#include "query/deprecated/ParserDeprecated.h"
|
2020-11-05 14:30:52 +08:00
|
|
|
#include "query/Expr.h"
|
|
|
|
#include "query/PlanNode.h"
|
|
|
|
#include "query/generated/ExprVisitor.h"
|
|
|
|
#include "query/generated/PlanNodeVisitor.h"
|
2020-11-06 15:34:39 +08:00
|
|
|
#include "test_utils/DataGen.h"
|
|
|
|
#include "query/generated/ShowPlanNodeVisitor.h"
|
2020-11-16 10:55:49 +08:00
|
|
|
#include "query/generated/ExecPlanNodeVisitor.h"
|
|
|
|
#include "query/PlanImpl.h"
|
2021-01-13 18:46:25 +08:00
|
|
|
#include "segcore/SegmentGrowingImpl.h"
|
2021-01-21 15:29:52 +08:00
|
|
|
#include "segcore/SegmentSealed.h"
|
2020-11-28 19:06:48 +08:00
|
|
|
#include "pb/schema.pb.h"
|
2020-11-03 11:45:48 +08:00
|
|
|
|
2020-11-16 15:41:56 +08:00
|
|
|
using namespace milvus;
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
2020-11-06 15:34:39 +08:00
|
|
|
|
|
|
|
TEST(Query, ShowExecutor) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
2020-11-16 15:41:56 +08:00
|
|
|
using namespace milvus;
|
2020-11-06 15:34:39 +08:00
|
|
|
auto node = std::make_unique<FloatVectorANNS>();
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
2020-11-16 15:41:56 +08:00
|
|
|
int64_t num_queries = 100L;
|
2020-11-06 15:34:39 +08:00
|
|
|
auto raw_data = DataGen(schema, num_queries);
|
2021-07-13 22:20:33 +08:00
|
|
|
auto& info = node->search_info_;
|
2021-01-21 15:29:52 +08:00
|
|
|
info.metric_type_ = MetricType::METRIC_L2;
|
2021-07-13 22:20:33 +08:00
|
|
|
info.topk_ = 20;
|
2021-01-13 11:08:03 +08:00
|
|
|
info.field_offset_ = FieldOffset(1000);
|
2020-11-06 15:34:39 +08:00
|
|
|
node->predicate_ = std::nullopt;
|
|
|
|
ShowPlanNodeVisitor show_visitor;
|
|
|
|
PlanNodePtr base(node.release());
|
|
|
|
auto res = show_visitor.call_child(*base);
|
2020-11-10 13:17:31 +08:00
|
|
|
auto dup = res;
|
|
|
|
std::cout << dup.dump(4);
|
2020-11-16 10:55:49 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Query, DSL) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
ShowPlanNodeVisitor shower;
|
|
|
|
|
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
2020-12-08 18:51:07 +08:00
|
|
|
"fakevec": {
|
2020-11-16 10:55:49 +08:00
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
2020-11-16 15:41:56 +08:00
|
|
|
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
2020-11-16 15:41:56 +08:00
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
2020-11-16 10:55:49 +08:00
|
|
|
auto res = shower.call_child(*plan->plan_node_);
|
|
|
|
std::cout << res.dump(4) << std::endl;
|
|
|
|
|
|
|
|
std::string dsl_string2 = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"vector": {
|
2020-12-08 18:51:07 +08:00
|
|
|
"fakevec": {
|
2020-11-16 10:55:49 +08:00
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})";
|
2020-11-16 15:41:56 +08:00
|
|
|
auto plan2 = CreatePlan(*schema, dsl_string2);
|
2020-11-16 10:55:49 +08:00
|
|
|
auto res2 = shower.call_child(*plan2->plan_node_);
|
|
|
|
std::cout << res2.dump(4) << std::endl;
|
|
|
|
ASSERT_EQ(res, res2);
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(Query, ParsePlaceholderGroup) {
|
2021-01-22 09:36:18 +08:00
|
|
|
namespace ser = milvus::proto::milvus;
|
2020-11-16 15:41:56 +08:00
|
|
|
std::string dsl_string = R"(
|
|
|
|
{
|
|
|
|
"bool": {
|
|
|
|
"vector": {
|
2020-11-19 10:46:17 +08:00
|
|
|
"fakevec": {
|
2020-11-16 15:41:56 +08:00
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 10
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
2020-11-16 15:41:56 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl_string);
|
2020-11-24 19:09:57 +08:00
|
|
|
int64_t num_queries = 100000;
|
2020-11-16 10:55:49 +08:00
|
|
|
int dim = 16;
|
2020-11-24 19:09:57 +08:00
|
|
|
auto raw_group = CreatePlaceholderGroup(num_queries, dim);
|
2020-11-16 10:55:49 +08:00
|
|
|
auto blob = raw_group.SerializeAsString();
|
2020-11-16 15:41:56 +08:00
|
|
|
auto placeholder = ParsePlaceholderGroup(plan.get(), blob);
|
2020-11-16 10:55:49 +08:00
|
|
|
}
|
|
|
|
|
2021-02-07 15:47:10 +08:00
|
|
|
TEST(Query, ExecWithPredicateLoader) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GE": -1,
|
|
|
|
"LT": 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
|
|
|
auto segment = CreateGrowingSegment(schema);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
ColumnBasedRawData raw_data;
|
|
|
|
raw_data.columns_ = dataset.cols_;
|
|
|
|
raw_data.count = N;
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), raw_data);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
auto sr = segment->Search(plan.get(), *ph_group, time);
|
2021-02-07 15:47:10 +08:00
|
|
|
int topk = 5;
|
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
Json json = SearchResultToJson(sr);
|
2021-02-07 15:47:10 +08:00
|
|
|
auto ref = json::parse(R"(
|
|
|
|
[
|
|
|
|
[
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"982->0.000000",
|
|
|
|
"25315->4.741588",
|
|
|
|
"57893->4.758279",
|
|
|
|
"551029->5.078479",
|
|
|
|
"455002->5.134716"
|
2021-02-07 15:47:10 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"528353->8.740297",
|
|
|
|
"659305->8.802286",
|
|
|
|
"935763->9.422906",
|
|
|
|
"794649->9.436665",
|
|
|
|
"192031->9.832053"
|
2021-02-07 15:47:10 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"980439->3.342777",
|
|
|
|
"433044->3.424016",
|
|
|
|
"797884->3.663446",
|
|
|
|
"697705->3.944479",
|
|
|
|
"186546->4.404788"
|
2021-02-07 15:47:10 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"642941->3.753775",
|
|
|
|
"967504->3.885163",
|
|
|
|
"764517->4.364819",
|
|
|
|
"332938->4.418214",
|
|
|
|
"232724->4.574215"
|
2021-02-07 15:47:10 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"351788->4.453843",
|
|
|
|
"410227->4.699380",
|
|
|
|
"501497->4.805948",
|
|
|
|
"715061->5.166959",
|
|
|
|
"414882->5.179897"
|
2021-02-07 15:47:10 +08:00
|
|
|
]
|
|
|
|
]
|
|
|
|
])");
|
2021-07-06 09:50:01 +08:00
|
|
|
std::cout << json.dump(2);
|
2021-02-07 15:47:10 +08:00
|
|
|
ASSERT_EQ(json.dump(2), ref.dump(2));
|
|
|
|
}
|
|
|
|
|
2021-03-22 16:36:10 +08:00
|
|
|
TEST(Query, ExecWithPredicateSmallN) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 7, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GE": -1,
|
|
|
|
"LT": 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 177;
|
|
|
|
auto dataset = DataGen(schema, N);
|
|
|
|
auto segment = CreateGrowingSegment(schema);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 7, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
auto sr = segment->Search(plan.get(), *ph_group, time);
|
2021-03-22 16:36:10 +08:00
|
|
|
int topk = 5;
|
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
Json json = SearchResultToJson(sr);
|
2021-03-22 16:36:10 +08:00
|
|
|
std::cout << json.dump(2);
|
|
|
|
}
|
|
|
|
|
2020-11-24 19:09:57 +08:00
|
|
|
TEST(Query, ExecWithPredicate) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
2020-11-24 19:09:57 +08:00
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GE": -1,
|
|
|
|
"LT": 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
2021-01-13 18:46:25 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema);
|
2020-11-24 19:09:57 +08:00
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
auto sr = segment->Search(plan.get(), *ph_group, time);
|
2020-11-24 19:09:57 +08:00
|
|
|
int topk = 5;
|
2020-11-30 22:14:19 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
Json json = SearchResultToJson(sr);
|
2020-12-18 18:35:03 +08:00
|
|
|
auto ref = json::parse(R"(
|
2020-12-08 18:51:07 +08:00
|
|
|
[
|
2020-11-24 19:09:57 +08:00
|
|
|
[
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"982->0.000000",
|
|
|
|
"25315->4.741588",
|
|
|
|
"57893->4.758279",
|
|
|
|
"551029->5.078479",
|
|
|
|
"455002->5.134716"
|
2020-11-24 19:09:57 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"528353->8.740297",
|
|
|
|
"659305->8.802286",
|
|
|
|
"935763->9.422906",
|
|
|
|
"794649->9.436665",
|
|
|
|
"192031->9.832053"
|
2020-11-24 19:09:57 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"980439->3.342777",
|
|
|
|
"433044->3.424016",
|
|
|
|
"797884->3.663446",
|
|
|
|
"697705->3.944479",
|
|
|
|
"186546->4.404788"
|
2020-11-24 19:09:57 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"642941->3.753775",
|
|
|
|
"967504->3.885163",
|
|
|
|
"764517->4.364819",
|
|
|
|
"332938->4.418214",
|
|
|
|
"232724->4.574215"
|
2020-11-24 19:09:57 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"351788->4.453843",
|
|
|
|
"410227->4.699380",
|
|
|
|
"501497->4.805948",
|
|
|
|
"715061->5.166959",
|
|
|
|
"414882->5.179897"
|
2020-11-24 19:09:57 +08:00
|
|
|
]
|
|
|
|
]
|
|
|
|
])");
|
2021-07-06 09:50:01 +08:00
|
|
|
std::cout << json.dump(2);
|
2020-12-08 18:51:07 +08:00
|
|
|
ASSERT_EQ(json.dump(2), ref.dump(2));
|
2020-11-24 19:09:57 +08:00
|
|
|
}
|
2021-01-21 15:29:52 +08:00
|
|
|
|
2020-12-10 20:13:37 +08:00
|
|
|
TEST(Query, ExecTerm) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
2020-12-10 20:13:37 +08:00
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"term": {
|
|
|
|
"age": {
|
|
|
|
"values": []
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
2021-01-13 18:46:25 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema);
|
2020-12-10 20:13:37 +08:00
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 3;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
2021-07-13 22:20:33 +08:00
|
|
|
SearchResult sr;
|
2020-12-10 20:13:37 +08:00
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
sr = segment->Search(plan.get(), *ph_group, time);
|
2020-12-10 20:13:37 +08:00
|
|
|
std::vector<std::vector<std::string>> results;
|
|
|
|
int topk = 5;
|
2021-07-13 22:20:33 +08:00
|
|
|
auto json = SearchResultToJson(sr);
|
|
|
|
ASSERT_EQ(sr.num_queries_, num_queries);
|
|
|
|
ASSERT_EQ(sr.topk_, topk);
|
2020-12-10 20:13:37 +08:00
|
|
|
// for(auto x: )
|
|
|
|
}
|
2020-11-24 19:09:57 +08:00
|
|
|
|
2021-01-07 09:32:17 +08:00
|
|
|
TEST(Query, ExecEmpty) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
2021-01-07 09:32:17 +08:00
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
2021-01-13 18:46:25 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema);
|
2021-01-07 09:32:17 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
auto sr = segment->Search(plan.get(), *ph_group, time);
|
|
|
|
std::cout << SearchResultToJson(sr);
|
2021-01-07 09:32:17 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
for (auto i : sr.internal_seg_offsets_) {
|
2021-01-07 09:32:17 +08:00
|
|
|
ASSERT_EQ(i, -1);
|
|
|
|
}
|
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
for (auto v : sr.result_distances_) {
|
2021-01-07 09:32:17 +08:00
|
|
|
ASSERT_EQ(v, std::numeric_limits<float>::max());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-27 12:46:37 +08:00
|
|
|
TEST(Query, ExecWithoutPredicateFlat) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, std::nullopt);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
|
|
|
auto segment = CreateGrowingSegment(schema);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
2021-07-13 22:20:33 +08:00
|
|
|
SearchResult sr;
|
2021-02-27 12:46:37 +08:00
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
sr = segment->Search(plan.get(), *ph_group, time);
|
2021-02-27 12:46:37 +08:00
|
|
|
std::vector<std::vector<std::string>> results;
|
|
|
|
int topk = 5;
|
2021-07-13 22:20:33 +08:00
|
|
|
auto json = SearchResultToJson(sr);
|
2021-02-27 12:46:37 +08:00
|
|
|
std::cout << json.dump(2);
|
|
|
|
}
|
|
|
|
|
2020-12-08 18:51:07 +08:00
|
|
|
TEST(Query, ExecWithoutPredicate) {
|
2020-11-16 10:55:49 +08:00
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
2020-11-24 19:09:57 +08:00
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_FLOAT, 16, MetricType::METRIC_L2);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
2020-11-24 19:09:57 +08:00
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
2021-01-07 09:32:17 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
2020-11-24 19:09:57 +08:00
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
2021-01-13 18:46:25 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema);
|
2020-11-24 19:09:57 +08:00
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroup(num_queries, 16, 1024);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
2021-07-13 22:20:33 +08:00
|
|
|
SearchResult sr;
|
2020-11-24 19:09:57 +08:00
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
sr = segment->Search(plan.get(), *ph_group, time);
|
2020-11-24 19:09:57 +08:00
|
|
|
std::vector<std::vector<std::string>> results;
|
|
|
|
int topk = 5;
|
2021-07-13 22:20:33 +08:00
|
|
|
auto json = SearchResultToJson(sr);
|
2020-12-18 18:35:03 +08:00
|
|
|
auto ref = json::parse(R"(
|
2020-12-08 18:51:07 +08:00
|
|
|
[
|
|
|
|
[
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"982->0.000000",
|
|
|
|
"25315->4.741588",
|
|
|
|
"57893->4.758279",
|
|
|
|
"694663->4.980466",
|
|
|
|
"551029->5.078479"
|
2020-12-08 18:51:07 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"559507->7.956653",
|
|
|
|
"871836->8.694542",
|
|
|
|
"528353->8.740297",
|
|
|
|
"659305->8.802286",
|
|
|
|
"516137->8.935913"
|
2020-12-08 18:51:07 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"980439->3.342777",
|
|
|
|
"433044->3.424016",
|
|
|
|
"527556->3.487235",
|
|
|
|
"797884->3.663446",
|
|
|
|
"814805->3.782786"
|
2020-12-08 18:51:07 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"642941->3.753775",
|
|
|
|
"967504->3.885163",
|
|
|
|
"177960->4.339530",
|
|
|
|
"764517->4.364819",
|
|
|
|
"841079->4.403300"
|
2020-12-08 18:51:07 +08:00
|
|
|
],
|
|
|
|
[
|
2021-07-06 09:50:01 +08:00
|
|
|
"688614->4.259011",
|
|
|
|
"351788->4.453843",
|
|
|
|
"452698->4.473838",
|
|
|
|
"410227->4.699380",
|
|
|
|
"501497->4.805948"
|
2020-12-08 18:51:07 +08:00
|
|
|
]
|
|
|
|
]
|
|
|
|
]
|
|
|
|
)");
|
2021-07-06 09:50:01 +08:00
|
|
|
std::cout << json.dump(2);
|
2020-12-08 18:51:07 +08:00
|
|
|
ASSERT_EQ(json.dump(2), ref.dump(2));
|
2020-11-16 10:55:49 +08:00
|
|
|
}
|
2020-11-28 19:06:48 +08:00
|
|
|
|
2021-01-07 09:32:17 +08:00
|
|
|
TEST(Indexing, InnerProduct) {
|
|
|
|
int64_t N = 100000;
|
|
|
|
constexpr auto dim = 16;
|
|
|
|
constexpr auto topk = 10;
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"normalized": {
|
|
|
|
"metric_type": "IP",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("normalized", DataType::VECTOR_FLOAT, dim, MetricType::METRIC_INNER_PRODUCT);
|
2021-01-07 09:32:17 +08:00
|
|
|
auto dataset = DataGen(schema, N);
|
2021-01-13 18:46:25 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema);
|
2021-01-07 09:32:17 +08:00
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
auto col = dataset.get_col<float>(0);
|
|
|
|
|
|
|
|
auto ph_group_raw = CreatePlaceholderGroupFromBlob(num_queries, 16, col.data());
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
2021-07-01 10:32:15 +08:00
|
|
|
Timestamp ts = N * 2;
|
2021-07-13 22:20:33 +08:00
|
|
|
SearchResult sr;
|
|
|
|
sr = segment->Search(plan.get(), *ph_group, ts);
|
|
|
|
std::cout << SearchResultToJson(sr).dump(2);
|
2021-01-07 09:32:17 +08:00
|
|
|
}
|
|
|
|
|
2020-11-28 19:06:48 +08:00
|
|
|
TEST(Query, FillSegment) {
|
|
|
|
namespace pb = milvus::proto;
|
|
|
|
pb::schema::CollectionSchema proto;
|
|
|
|
proto.set_name("col");
|
|
|
|
proto.set_description("asdfhsalkgfhsadg");
|
2021-01-06 14:45:50 +08:00
|
|
|
proto.set_autoid(false);
|
2021-02-07 15:47:10 +08:00
|
|
|
auto dim = 16;
|
2020-11-28 19:06:48 +08:00
|
|
|
|
|
|
|
{
|
|
|
|
auto field = proto.add_fields();
|
|
|
|
field->set_name("fakevec");
|
|
|
|
field->set_is_primary_key(false);
|
|
|
|
field->set_description("asdgfsagf");
|
2021-01-04 10:13:01 +08:00
|
|
|
field->set_fieldid(100);
|
2021-03-12 14:22:09 +08:00
|
|
|
field->set_data_type(pb::schema::DataType::FloatVector);
|
2020-11-28 19:06:48 +08:00
|
|
|
auto param = field->add_type_params();
|
|
|
|
param->set_key("dim");
|
|
|
|
param->set_value("16");
|
2020-12-08 18:51:07 +08:00
|
|
|
auto iparam = field->add_index_params();
|
|
|
|
iparam->set_key("metric_type");
|
|
|
|
iparam->set_value("L2");
|
2020-11-28 19:06:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
auto field = proto.add_fields();
|
|
|
|
field->set_name("the_key");
|
2021-01-04 10:13:01 +08:00
|
|
|
field->set_fieldid(101);
|
2020-11-28 19:06:48 +08:00
|
|
|
field->set_is_primary_key(true);
|
|
|
|
field->set_description("asdgfsagf");
|
2021-03-12 14:22:09 +08:00
|
|
|
field->set_data_type(pb::schema::DataType::Int64);
|
2020-11-28 19:06:48 +08:00
|
|
|
}
|
|
|
|
|
2021-02-07 15:47:10 +08:00
|
|
|
{
|
|
|
|
auto field = proto.add_fields();
|
|
|
|
field->set_name("the_value");
|
|
|
|
field->set_fieldid(102);
|
|
|
|
field->set_is_primary_key(false);
|
|
|
|
field->set_description("asdgfsagf");
|
2021-03-12 14:22:09 +08:00
|
|
|
field->set_data_type(pb::schema::DataType::Int32);
|
2021-02-07 15:47:10 +08:00
|
|
|
}
|
|
|
|
|
2020-11-28 19:06:48 +08:00
|
|
|
auto schema = Schema::ParseFrom(proto);
|
2021-02-07 15:47:10 +08:00
|
|
|
|
|
|
|
// dispatch here
|
2020-11-28 19:06:48 +08:00
|
|
|
int N = 100000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
2021-02-07 15:47:10 +08:00
|
|
|
const auto std_vec = dataset.get_col<int64_t>(1);
|
|
|
|
const auto std_vfloat_vec = dataset.get_col<float>(0);
|
|
|
|
const auto std_i32_vec = dataset.get_col<int32_t>(2);
|
|
|
|
|
|
|
|
std::vector<std::unique_ptr<SegmentInternalInterface>> segments;
|
|
|
|
segments.emplace_back([&] {
|
|
|
|
auto segment = CreateGrowingSegment(schema);
|
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
return segment;
|
|
|
|
}());
|
|
|
|
segments.emplace_back([&] {
|
|
|
|
auto segment = CreateSealedSegment(schema);
|
|
|
|
SealedLoader(dataset, *segment);
|
|
|
|
// auto indexing = GenIndexing(N, dim, std_vfloat_vec.data());
|
|
|
|
|
|
|
|
// LoadIndexInfo info;
|
|
|
|
// auto field_offset = schema->get_offset(FieldName("fakevec"));
|
|
|
|
// auto& meta = schema->operator[](field_offset);
|
|
|
|
|
|
|
|
// info.field_id = meta.get_id().get();
|
|
|
|
// info.field_name = meta.get_name().get();
|
|
|
|
// info.index_params["metric_type"] = "L2";
|
|
|
|
// info.index = indexing;
|
|
|
|
|
|
|
|
// segment->LoadIndex(info);
|
|
|
|
return segment;
|
|
|
|
}());
|
|
|
|
|
2020-11-28 19:06:48 +08:00
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "L2",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto ph_proto = CreatePlaceholderGroup(10, 16, 443);
|
|
|
|
auto ph = ParsePlaceholderGroup(plan.get(), ph_proto.SerializeAsString());
|
2021-07-01 10:32:15 +08:00
|
|
|
Timestamp ts = N * 2UL;
|
2020-11-28 19:06:48 +08:00
|
|
|
auto topk = 5;
|
|
|
|
auto num_queries = 10;
|
|
|
|
|
2021-02-07 15:47:10 +08:00
|
|
|
for (auto& segment : segments) {
|
|
|
|
plan->target_entries_.clear();
|
|
|
|
plan->target_entries_.push_back(schema->get_offset(FieldName("fakevec")));
|
|
|
|
plan->target_entries_.push_back(schema->get_offset(FieldName("the_value")));
|
2021-07-13 22:20:33 +08:00
|
|
|
SearchResult result = segment->Search(plan.get(), *ph, ts);
|
|
|
|
// std::cout << SearchResultToJson(result).dump(2);
|
2021-02-07 15:47:10 +08:00
|
|
|
result.result_offsets_.resize(topk * num_queries);
|
|
|
|
segment->FillTargetEntry(plan.get(), result);
|
|
|
|
|
|
|
|
auto ans = result.row_data_;
|
|
|
|
ASSERT_EQ(ans.size(), topk * num_queries);
|
|
|
|
int64_t std_index = 0;
|
|
|
|
|
|
|
|
for (auto& vec : ans) {
|
|
|
|
ASSERT_EQ(vec.size(), sizeof(int64_t) + sizeof(float) * dim + sizeof(int32_t));
|
|
|
|
int64_t val;
|
|
|
|
memcpy(&val, vec.data(), sizeof(int64_t));
|
|
|
|
|
|
|
|
auto internal_offset = result.internal_seg_offsets_[std_index];
|
|
|
|
auto std_val = std_vec[internal_offset];
|
|
|
|
auto std_i32 = std_i32_vec[internal_offset];
|
|
|
|
std::vector<float> std_vfloat(dim);
|
|
|
|
std::copy_n(std_vfloat_vec.begin() + dim * internal_offset, dim, std_vfloat.begin());
|
|
|
|
|
|
|
|
ASSERT_EQ(val, std_val) << "io:" << internal_offset;
|
|
|
|
if (val != -1) {
|
|
|
|
std::vector<float> vfloat(dim);
|
|
|
|
int i32;
|
|
|
|
memcpy(vfloat.data(), vec.data() + sizeof(int64_t), dim * sizeof(float));
|
|
|
|
memcpy(&i32, vec.data() + sizeof(int64_t) + dim * sizeof(float), sizeof(int32_t));
|
|
|
|
ASSERT_EQ(vfloat, std_vfloat) << std_index;
|
|
|
|
ASSERT_EQ(i32, std_i32) << std_index;
|
|
|
|
}
|
|
|
|
++std_index;
|
|
|
|
}
|
2020-11-28 19:06:48 +08:00
|
|
|
}
|
|
|
|
}
|
2020-12-08 18:51:07 +08:00
|
|
|
|
|
|
|
TEST(Query, ExecWithPredicateBinary) {
|
|
|
|
using namespace milvus::query;
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("fakevec", DataType::VECTOR_BINARY, 512, MetricType::METRIC_Jaccard);
|
|
|
|
schema->AddDebugField("age", DataType::FLOAT);
|
2020-12-08 18:51:07 +08:00
|
|
|
std::string dsl = R"({
|
|
|
|
"bool": {
|
|
|
|
"must": [
|
|
|
|
{
|
|
|
|
"range": {
|
|
|
|
"age": {
|
|
|
|
"GE": -1,
|
|
|
|
"LT": 1
|
|
|
|
}
|
|
|
|
}
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"vector": {
|
|
|
|
"fakevec": {
|
|
|
|
"metric_type": "Jaccard",
|
|
|
|
"params": {
|
|
|
|
"nprobe": 10
|
|
|
|
},
|
|
|
|
"query": "$0",
|
|
|
|
"topk": 5
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
})";
|
|
|
|
int64_t N = 1000 * 1000;
|
|
|
|
auto dataset = DataGen(schema, N);
|
2021-01-13 18:46:25 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema);
|
2020-12-08 18:51:07 +08:00
|
|
|
segment->PreInsert(N);
|
|
|
|
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
|
|
|
auto vec_ptr = dataset.get_col<uint8_t>(0);
|
|
|
|
|
|
|
|
auto plan = CreatePlan(*schema, dsl);
|
|
|
|
auto num_queries = 5;
|
|
|
|
auto ph_group_raw = CreateBinaryPlaceholderGroupFromBlob(num_queries, 512, vec_ptr.data() + 1024 * 512 / 8);
|
|
|
|
auto ph_group = ParsePlaceholderGroup(plan.get(), ph_group_raw.SerializeAsString());
|
2021-07-13 22:20:33 +08:00
|
|
|
SearchResult sr;
|
2020-12-08 18:51:07 +08:00
|
|
|
Timestamp time = 1000000;
|
2021-07-01 10:32:15 +08:00
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
sr = segment->Search(plan.get(), *ph_group, time);
|
2020-12-08 18:51:07 +08:00
|
|
|
int topk = 5;
|
|
|
|
|
2021-07-13 22:20:33 +08:00
|
|
|
Json json = SearchResultToJson(sr);
|
2020-12-08 18:51:07 +08:00
|
|
|
std::cout << json.dump(2);
|
|
|
|
// ASSERT_EQ(json.dump(2), ref.dump(2));
|
|
|
|
}
|