2020-08-26 16:49:03 +08:00
|
|
|
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
|
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
|
// or implied. See the License for the specific language governing permissions and limitations under the License.
|
|
|
|
|
2020-09-15 10:00:00 +08:00
|
|
|
#include <gtest/gtest.h>
|
2021-09-22 17:19:53 +08:00
|
|
|
#include <random>
|
2020-08-26 16:49:03 +08:00
|
|
|
#include <string>
|
2022-09-23 17:08:51 +08:00
|
|
|
#include <iostream>
|
2021-10-18 18:38:47 +08:00
|
|
|
|
2021-11-19 17:23:12 +08:00
|
|
|
#include "segcore/SegmentGrowingImpl.h"
|
2022-04-29 13:35:49 +08:00
|
|
|
#include "test_utils/DataGen.h"
|
2021-09-22 17:19:53 +08:00
|
|
|
|
2020-11-16 15:41:56 +08:00
|
|
|
using namespace milvus;
|
2020-08-26 16:49:03 +08:00
|
|
|
|
2020-10-15 21:31:50 +08:00
|
|
|
namespace {
|
2024-06-18 14:42:00 +08:00
|
|
|
static constexpr int64_t seg_id = 101;
|
2020-10-15 21:31:50 +08:00
|
|
|
auto
|
|
|
|
generate_data(int N) {
|
|
|
|
std::vector<char> raw_data;
|
|
|
|
std::vector<uint64_t> timestamps;
|
|
|
|
std::vector<int64_t> uids;
|
|
|
|
std::default_random_engine er(42);
|
|
|
|
std::normal_distribution<> distribution(0.0, 1.0);
|
|
|
|
std::default_random_engine ei(42);
|
2020-10-23 18:01:24 +08:00
|
|
|
|
2020-10-15 21:31:50 +08:00
|
|
|
for (int i = 0; i < N; ++i) {
|
|
|
|
uids.push_back(10 * N + i);
|
|
|
|
timestamps.push_back(0);
|
|
|
|
// append vec
|
|
|
|
float vec[16];
|
|
|
|
for (auto& x : vec) {
|
|
|
|
x = distribution(er);
|
|
|
|
}
|
2023-03-10 09:47:54 +08:00
|
|
|
raw_data.insert(raw_data.end(),
|
|
|
|
(const char*)std::begin(vec),
|
|
|
|
(const char*)std::end(vec));
|
2020-10-15 21:31:50 +08:00
|
|
|
int age = ei() % 100;
|
2023-03-10 09:47:54 +08:00
|
|
|
raw_data.insert(raw_data.end(),
|
|
|
|
(const char*)&age,
|
|
|
|
((const char*)&age) + sizeof(age));
|
2020-10-15 21:31:50 +08:00
|
|
|
}
|
|
|
|
return std::make_tuple(raw_data, timestamps, uids);
|
|
|
|
}
|
2020-10-23 18:01:24 +08:00
|
|
|
} // namespace
|
2020-08-26 16:49:03 +08:00
|
|
|
|
2020-10-31 15:11:47 +08:00
|
|
|
TEST(SegmentCoreTest, NormalDistributionTest) {
|
|
|
|
using namespace milvus::segcore;
|
2020-10-15 21:31:50 +08:00
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2021-09-23 17:55:54 +08:00
|
|
|
int N = 100 * 1000;
|
2020-10-15 21:31:50 +08:00
|
|
|
auto [raw_data, timestamps, uids] = generate_data(N);
|
2023-04-26 10:14:41 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema, empty_index_meta);
|
2020-10-15 21:31:50 +08:00
|
|
|
segment->PreInsert(N);
|
|
|
|
}
|
|
|
|
|
2022-03-10 16:33:59 +08:00
|
|
|
// Test insert column-based data
|
|
|
|
TEST(SegmentCoreTest, MockTest2) {
|
2022-03-21 14:23:24 +08:00
|
|
|
using namespace milvus::segcore;
|
|
|
|
|
|
|
|
// schema
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2022-04-29 13:35:49 +08:00
|
|
|
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
|
|
|
schema->set_primary_field_id(i64_fid);
|
2022-03-21 14:23:24 +08:00
|
|
|
|
|
|
|
int N = 10000; // number of records
|
2022-04-29 13:35:49 +08:00
|
|
|
auto dataset = DataGen(schema, N);
|
2023-04-26 10:14:41 +08:00
|
|
|
auto segment = CreateGrowingSegment(schema, empty_index_meta);
|
2022-03-21 14:23:24 +08:00
|
|
|
auto reserved_begin = segment->PreInsert(N);
|
2023-03-10 09:47:54 +08:00
|
|
|
segment->Insert(reserved_begin,
|
|
|
|
N,
|
|
|
|
dataset.row_ids_.data(),
|
|
|
|
dataset.timestamps_.data(),
|
|
|
|
dataset.raw_);
|
2022-03-10 16:33:59 +08:00
|
|
|
}
|
|
|
|
|
2020-10-31 15:11:47 +08:00
|
|
|
TEST(SegmentCoreTest, SmallIndex) {
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2021-01-13 11:08:03 +08:00
|
|
|
schema->AddDebugField("age", DataType::INT32);
|
2020-10-31 15:11:47 +08:00
|
|
|
}
|
2022-09-16 10:52:47 +08:00
|
|
|
|
2022-09-23 17:08:51 +08:00
|
|
|
TEST(InsertRecordTest, growing_int64_t) {
|
2022-09-16 10:52:47 +08:00
|
|
|
using namespace milvus::segcore;
|
2022-09-23 17:08:51 +08:00
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2022-09-23 17:08:51 +08:00
|
|
|
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
|
|
|
schema->set_primary_field_id(i64_fid);
|
|
|
|
auto record = milvus::segcore::InsertRecord<false>(*schema, int64_t(32));
|
2022-10-14 14:45:23 +08:00
|
|
|
const int N = 100000;
|
2022-09-23 17:08:51 +08:00
|
|
|
|
2023-03-10 09:47:54 +08:00
|
|
|
for (int i = 1; i <= N; i++)
|
|
|
|
record.insert_pk(PkType(int64_t(i)), int64_t(i));
|
2022-09-23 17:08:51 +08:00
|
|
|
|
2022-10-14 14:45:23 +08:00
|
|
|
for (int i = 1; i <= N; i++) {
|
2023-03-10 09:47:54 +08:00
|
|
|
std::vector<SegOffset> offset =
|
|
|
|
record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
|
2022-09-23 17:08:51 +08:00
|
|
|
ASSERT_EQ(offset[0].get(), int64_t(i));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
TEST(InsertRecordTest, growing_string) {
|
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2022-09-23 17:08:51 +08:00
|
|
|
auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR);
|
|
|
|
schema->set_primary_field_id(i64_fid);
|
|
|
|
auto record = milvus::segcore::InsertRecord<false>(*schema, int64_t(32));
|
|
|
|
const int N = 100000;
|
|
|
|
|
2023-03-10 09:47:54 +08:00
|
|
|
for (int i = 1; i <= N; i++)
|
|
|
|
record.insert_pk(PkType(std::to_string(i)), int64_t(i));
|
2022-09-23 17:08:51 +08:00
|
|
|
|
2022-10-14 14:45:23 +08:00
|
|
|
for (int i = 1; i <= N; i++) {
|
2023-03-10 09:47:54 +08:00
|
|
|
std::vector<SegOffset> offset =
|
|
|
|
record.search_pk(std::to_string(i), int64_t(N + 1));
|
2022-09-23 17:08:51 +08:00
|
|
|
ASSERT_EQ(offset[0].get(), int64_t(i));
|
|
|
|
}
|
2022-09-16 10:52:47 +08:00
|
|
|
}
|
|
|
|
|
2022-09-23 17:08:51 +08:00
|
|
|
TEST(InsertRecordTest, sealed_int64_t) {
|
2022-09-16 10:52:47 +08:00
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2022-09-16 10:52:47 +08:00
|
|
|
auto i64_fid = schema->AddDebugField("age", DataType::INT64);
|
|
|
|
schema->set_primary_field_id(i64_fid);
|
2022-09-23 17:08:51 +08:00
|
|
|
auto record = milvus::segcore::InsertRecord<true>(*schema, int64_t(32));
|
|
|
|
const int N = 100000;
|
2022-09-16 10:52:47 +08:00
|
|
|
|
2023-03-10 09:47:54 +08:00
|
|
|
for (int i = N; i >= 1; i--)
|
|
|
|
record.insert_pk(PkType(int64_t(i)), int64_t(i));
|
2022-09-23 17:08:51 +08:00
|
|
|
record.seal_pks();
|
|
|
|
|
2022-10-14 14:45:23 +08:00
|
|
|
for (int i = 1; i <= N; i++) {
|
2023-03-10 09:47:54 +08:00
|
|
|
std::vector<SegOffset> offset =
|
|
|
|
record.search_pk(PkType(int64_t(i)), int64_t(N + 1));
|
2022-09-23 17:08:51 +08:00
|
|
|
ASSERT_EQ(offset[0].get(), int64_t(i));
|
|
|
|
}
|
2022-09-16 10:52:47 +08:00
|
|
|
}
|
|
|
|
|
2022-09-23 17:08:51 +08:00
|
|
|
TEST(InsertRecordTest, sealed_string) {
|
2022-09-16 10:52:47 +08:00
|
|
|
using namespace milvus::segcore;
|
|
|
|
auto schema = std::make_shared<Schema>();
|
2023-03-10 09:47:54 +08:00
|
|
|
schema->AddDebugField(
|
|
|
|
"fakevec", DataType::VECTOR_FLOAT, 16, knowhere::metric::L2);
|
2022-09-16 10:52:47 +08:00
|
|
|
auto i64_fid = schema->AddDebugField("name", DataType::VARCHAR);
|
|
|
|
schema->set_primary_field_id(i64_fid);
|
2022-09-23 17:08:51 +08:00
|
|
|
auto record = milvus::segcore::InsertRecord<true>(*schema, int64_t(32));
|
|
|
|
const int N = 100000;
|
|
|
|
|
2023-03-10 09:47:54 +08:00
|
|
|
for (int i = 1; i <= N; i++)
|
|
|
|
record.insert_pk(PkType(std::to_string(i)), int64_t(i));
|
2022-09-16 10:52:47 +08:00
|
|
|
|
2022-09-23 17:08:51 +08:00
|
|
|
record.seal_pks();
|
|
|
|
|
2022-10-14 14:45:23 +08:00
|
|
|
for (int i = 1; i <= N; i++) {
|
2023-03-10 09:47:54 +08:00
|
|
|
std::vector<SegOffset> offset =
|
|
|
|
record.search_pk(std::to_string(i), int64_t(N + 1));
|
2022-09-23 17:08:51 +08:00
|
|
|
ASSERT_EQ(offset[0].get(), int64_t(i));
|
|
|
|
}
|
2022-09-16 10:52:47 +08:00
|
|
|
}
|