Improve segcore (#7851)

Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
This commit is contained in:
Cai Yudong 2021-09-14 10:53:04 +08:00 committed by GitHub
parent 1e935d33ed
commit 74ac528095
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 10 additions and 11 deletions

View File

@ -13,7 +13,8 @@
namespace milvus::segcore {
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) : uids_(1), timestamps_(1) {
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk)
: uids_(size_per_chunk), timestamps_(size_per_chunk) {
for (auto& field : schema) {
if (field.is_vector()) {
if (field.get_data_type() == DataType::VECTOR_FLOAT) {

View File

@ -140,7 +140,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
}
std::sort(ordering.begin(), ordering.end());
// step 3: and convert row-base data to column base accordingly
// step 3: and convert row-based data to column-based data accordingly
auto sizeof_infos = schema_->get_sizeof_infos();
std::vector<int> offset_infos(schema_->size() + 1, 0);
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
@ -161,7 +161,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
for (int fid = 0; fid < schema_->size(); ++fid) {
auto len = sizeof_infos[fid];
auto offset = offset_infos[fid];
auto src = raw_data + offset + order_index * len_per_row;
auto src = raw_data + order_index * len_per_row + offset;
auto dst = entities[fid].data() + index * len;
memcpy(dst, src, len);
}

View File

@ -18,14 +18,11 @@ using namespace milvus::segcore;
TEST(Binary, Insert) {
int64_t N = 100000;
int64_t num_queries = 10;
int64_t topK = 5;
auto schema = std::make_shared<Schema>();
schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard);
schema->AddDebugField("age", DataType::INT32);
auto dataset = DataGen(schema, N, 10);
auto segment = CreateGrowingSegment(schema);
segment->PreInsert(N);
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
int i = 1 + 1;
auto offset = segment->PreInsert(N);
segment->Insert(offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
}

View File

@ -30,7 +30,7 @@ using boost::algorithm::starts_with;
namespace milvus::segcore {
struct GeneratedData {
std::vector<char> rows_;
std::vector<uint8_t> rows_;
std::vector<aligned_vector<uint8_t>> cols_;
std::vector<idx_t> row_ids_;
std::vector<Timestamp> timestamps_;
@ -68,13 +68,14 @@ GeneratedData::generate_rows(int64_t N, SchemaPtr schema) {
int64_t len_per_row = offset_infos.back();
assert(len_per_row == schema->get_total_sizeof());
std::vector<char> result(len_per_row * N);
// change column-based data to row-based data
std::vector<uint8_t> result(len_per_row * N);
for (int index = 0; index < N; ++index) {
for (int fid = 0; fid < schema->size(); ++fid) {
auto len = sizeof_infos[fid];
auto offset = offset_infos[fid];
auto src = cols_[fid].data() + index * len;
auto dst = result.data() + offset + index * len_per_row;
auto dst = result.data() + index * len_per_row + offset;
memcpy(dst, src, len);
}
}