mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 03:48:37 +08:00
Improve segcore (#7851)
Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
This commit is contained in:
parent
1e935d33ed
commit
74ac528095
@ -13,7 +13,8 @@
|
||||
|
||||
namespace milvus::segcore {
|
||||
|
||||
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk) : uids_(1), timestamps_(1) {
|
||||
InsertRecord::InsertRecord(const Schema& schema, int64_t size_per_chunk)
|
||||
: uids_(size_per_chunk), timestamps_(size_per_chunk) {
|
||||
for (auto& field : schema) {
|
||||
if (field.is_vector()) {
|
||||
if (field.get_data_type() == DataType::VECTOR_FLOAT) {
|
||||
|
@ -140,7 +140,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
|
||||
}
|
||||
std::sort(ordering.begin(), ordering.end());
|
||||
|
||||
// step 3: and convert row-base data to column base accordingly
|
||||
// step 3: and convert row-based data to column-based data accordingly
|
||||
auto sizeof_infos = schema_->get_sizeof_infos();
|
||||
std::vector<int> offset_infos(schema_->size() + 1, 0);
|
||||
std::partial_sum(sizeof_infos.begin(), sizeof_infos.end(), offset_infos.begin() + 1);
|
||||
@ -161,7 +161,7 @@ SegmentGrowingImpl::Insert(int64_t reserved_begin,
|
||||
for (int fid = 0; fid < schema_->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
auto offset = offset_infos[fid];
|
||||
auto src = raw_data + offset + order_index * len_per_row;
|
||||
auto src = raw_data + order_index * len_per_row + offset;
|
||||
auto dst = entities[fid].data() + index * len;
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
|
@ -18,14 +18,11 @@ using namespace milvus::segcore;
|
||||
|
||||
TEST(Binary, Insert) {
|
||||
int64_t N = 100000;
|
||||
int64_t num_queries = 10;
|
||||
int64_t topK = 5;
|
||||
auto schema = std::make_shared<Schema>();
|
||||
schema->AddDebugField("vecbin", DataType::VECTOR_BINARY, 128, MetricType::METRIC_Jaccard);
|
||||
schema->AddDebugField("age", DataType::INT32);
|
||||
auto dataset = DataGen(schema, N, 10);
|
||||
auto segment = CreateGrowingSegment(schema);
|
||||
segment->PreInsert(N);
|
||||
segment->Insert(0, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
int i = 1 + 1;
|
||||
auto offset = segment->PreInsert(N);
|
||||
segment->Insert(offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), dataset.raw_);
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ using boost::algorithm::starts_with;
|
||||
namespace milvus::segcore {
|
||||
|
||||
struct GeneratedData {
|
||||
std::vector<char> rows_;
|
||||
std::vector<uint8_t> rows_;
|
||||
std::vector<aligned_vector<uint8_t>> cols_;
|
||||
std::vector<idx_t> row_ids_;
|
||||
std::vector<Timestamp> timestamps_;
|
||||
@ -68,13 +68,14 @@ GeneratedData::generate_rows(int64_t N, SchemaPtr schema) {
|
||||
int64_t len_per_row = offset_infos.back();
|
||||
assert(len_per_row == schema->get_total_sizeof());
|
||||
|
||||
std::vector<char> result(len_per_row * N);
|
||||
// change column-based data to row-based data
|
||||
std::vector<uint8_t> result(len_per_row * N);
|
||||
for (int index = 0; index < N; ++index) {
|
||||
for (int fid = 0; fid < schema->size(); ++fid) {
|
||||
auto len = sizeof_infos[fid];
|
||||
auto offset = offset_infos[fid];
|
||||
auto src = cols_[fid].data() + index * len;
|
||||
auto dst = result.data() + offset + index * len_per_row;
|
||||
auto dst = result.data() + index * len_per_row + offset;
|
||||
memcpy(dst, src, len);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user