Replace text codec of pb with binary codec (#16955)

Signed-off-by: dragondriver <jiquan.long@zilliz.com>
This commit is contained in:
Jiquan Long 2022-05-13 09:53:53 +08:00 committed by GitHub
parent 63385b7c12
commit c5149c7e85
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 147 additions and 209 deletions

View File

@ -70,13 +70,15 @@ typedef struct CProto {
typedef struct CLoadFieldDataInfo {
int64_t field_id;
const char* blob;
const uint8_t* blob;
uint64_t blob_size;
int64_t row_count;
} CLoadFieldDataInfo;
typedef struct CLoadDeletedRecordInfo {
void* timestamps;
const char* primary_keys;
const uint8_t* primary_keys;
const uint64_t primary_keys_size;
int64_t row_count;
} CLoadDeletedRecordInfo;

View File

@ -134,14 +134,13 @@ Insert(CSegmentInterface c_segment,
int64_t size,
const int64_t* row_ids,
const uint64_t* timestamps,
const char* data_info) {
const uint8_t* data_info,
const uint64_t data_info_len) {
try {
auto segment = (milvus::segcore::SegmentGrowing*)c_segment;
auto proto = std::string(data_info);
Assert(!proto.empty());
auto insert_data = std::make_unique<milvus::InsertData>();
auto suc = google::protobuf::TextFormat::ParseFromString(proto, insert_data.get());
AssertInfo(suc, "unmarshal field data string failed");
auto suc = insert_data->ParseFromArray(data_info, data_info_len);
AssertInfo(suc, "failed to parse insert data from records");
segment->Insert(reserved_offset, size, row_ids, timestamps, insert_data.get());
return milvus::SuccessCStatus();
@ -162,14 +161,16 @@ PreInsert(CSegmentInterface c_segment, int64_t size, int64_t* offset) {
}
CStatus
Delete(
CSegmentInterface c_segment, int64_t reserved_offset, int64_t size, const char* ids, const uint64_t* timestamps) {
Delete(CSegmentInterface c_segment,
int64_t reserved_offset,
int64_t size,
const uint8_t* ids,
const uint64_t ids_size,
const uint64_t* timestamps) {
auto segment = (milvus::segcore::SegmentInterface*)c_segment;
auto proto = std::string(ids);
Assert(!proto.empty());
auto pks = std::make_unique<milvus::proto::schema::IDs>();
auto suc = google::protobuf::TextFormat::ParseFromString(proto, pks.get());
AssertInfo(suc, "unmarshal field data string failed");
auto suc = pks->ParseFromArray(ids, ids_size);
AssertInfo(suc, "failed to parse pks from ids");
try {
auto res = segment->Delete(reserved_offset, size, pks.get(), timestamps);
return milvus::SuccessCStatus();
@ -192,10 +193,8 @@ LoadFieldData(CSegmentInterface c_segment, CLoadFieldDataInfo load_field_data_in
auto segment_interface = reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
auto segment = dynamic_cast<milvus::segcore::SegmentSealed*>(segment_interface);
AssertInfo(segment != nullptr, "segment conversion failed");
auto proto = std::string(load_field_data_info.blob);
Assert(!proto.empty());
auto field_data = std::make_unique<milvus::DataArray>();
auto suc = google::protobuf::TextFormat::ParseFromString(proto, field_data.get());
auto suc = field_data->ParseFromArray(load_field_data_info.blob, load_field_data_info.blob_size);
AssertInfo(suc, "unmarshal field data string failed");
auto load_info =
LoadFieldDataInfo{load_field_data_info.field_id, field_data.get(), load_field_data_info.row_count};
@ -211,10 +210,8 @@ LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_re
try {
auto segment_interface = reinterpret_cast<milvus::segcore::SegmentInterface*>(c_segment);
AssertInfo(segment_interface != nullptr, "segment conversion failed");
auto proto = std::string(deleted_record_info.primary_keys);
Assert(!proto.empty());
auto pks = std::make_unique<milvus::proto::schema::IDs>();
auto suc = google::protobuf::TextFormat::ParseFromString(proto, pks.get());
auto suc = pks->ParseFromArray(deleted_record_info.primary_keys, deleted_record_info.primary_keys_size);
AssertInfo(suc, "unmarshal field data string failed");
auto load_info =
LoadDeletedRecordInfo{deleted_record_info.timestamps, pks.get(), deleted_record_info.row_count};

View File

@ -67,7 +67,8 @@ Insert(CSegmentInterface c_segment,
int64_t size,
const int64_t* row_ids,
const uint64_t* timestamps,
const char* data_info);
const uint8_t* data_info,
const uint64_t data_info_len);
CStatus
PreInsert(CSegmentInterface c_segment, int64_t size, int64_t* offset);
@ -90,7 +91,12 @@ DropSealedSegmentIndex(CSegmentInterface c_segment, int64_t field_id);
////////////////////////////// interfaces for SegmentInterface //////////////////////////////
CStatus
Delete(CSegmentInterface c_segment, int64_t reserved_offset, int64_t size, const char* ids, const uint64_t* timestamps);
Delete(CSegmentInterface c_segment,
int64_t reserved_offset,
int64_t size,
const uint8_t* ids,
const uint64_t ids_size,
const uint64_t* timestamps);
int64_t
PreDelete(CSegmentInterface c_segment, int64_t size);

View File

@ -198,6 +198,16 @@ TEST(CApiTest, SegmentTest) {
DeleteSegment(segment);
}
template <typename Message>
std::vector<uint8_t>
serialize(const Message* msg) {
auto l = msg->ByteSize();
std::vector<uint8_t> ret(l);
auto ok = msg->SerializeToArray(ret.data(), l);
assert(ok);
return ret;
}
TEST(CApiTest, InsertTest) {
auto c_collection = NewCollection(get_default_schema_config());
auto segment = NewSegment(c_collection, Growing, -1);
@ -209,10 +219,8 @@ TEST(CApiTest, InsertTest) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(res.error_code == Success);
DeleteCollection(c_collection);
@ -226,14 +234,12 @@ TEST(CApiTest, DeleteTest) {
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end());
std::string delete_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data);
assert(marshal == true);
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {0, 0, 0};
auto offset = PreDelete(segment, 3);
auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps);
auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps);
assert(del_res.error_code == Success);
DeleteCollection(collection);
@ -248,19 +254,17 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
int N = 10;
auto dataset = DataGen(col->get_schema(), N);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto insert_data = serialize(dataset.raw_);
// first insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
int64_t offset;
PreInsert(segment, N, &offset);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(res.error_code == Success);
// second insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
PreInsert(segment, N, &offset);
res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(res.error_code == Success);
// create retrieve plan pks in {1, 2, 3}
@ -285,13 +289,11 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) {
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end());
std::string delete_data;
marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data);
assert(marshal == true);
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
offset = PreDelete(segment, 3);
auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps.data());
auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data());
assert(del_res.error_code == Success);
// retrieve pks in {1, 2, 3}
@ -320,11 +322,9 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
for (auto& [field_id, field_meta] : col->get_schema()->get_fields()) {
auto array = dataset.get_col(field_id);
std::string data;
auto marshal = google::protobuf::TextFormat::PrintToString(*array.get(), &data);
assert(marshal == true);
auto data = serialize(array.get());
auto load_info = CLoadFieldDataInfo{field_id.get(), data.c_str(), N};
auto load_info = CLoadFieldDataInfo{field_id.get(), data.data(), data.size(), N};
auto res = LoadFieldData(segment, load_info);
assert(res.error_code == Success);
@ -334,10 +334,8 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto ts_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta);
std::string ts_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*ts_array.get(), &ts_data);
assert(marshal == true);
auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.c_str(), N};
auto ts_data = serialize(ts_array.get());
auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N};
auto res = LoadFieldData(segment, load_info);
assert(res.error_code == Success);
auto count = GetRowCount(segment);
@ -345,10 +343,8 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_id_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_is_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_id_array.get(), &row_is_data);
assert(marshal == true);
load_info = CLoadFieldDataInfo{RowFieldID.get(), ts_data.c_str(), N};
auto row_id_data = serialize(row_id_array.get());
load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N};
res = LoadFieldData(segment, load_info);
assert(res.error_code == Success);
count = GetRowCount(segment);
@ -376,14 +372,12 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) {
std::vector<int64_t> delete_row_ids = {1, 2, 3};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end());
std::string delete_data;
marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data);
assert(marshal == true);
auto delete_data = serialize(ids.get());
std::vector<uint64_t> delete_timestamps(3, dataset.timestamps_[N - 1]);
auto offset = PreDelete(segment, 3);
auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps.data());
auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data());
assert(del_res.error_code == Success);
// retrieve pks in {1, 2, 3}
@ -414,10 +408,8 @@ TEST(CApiTest, SearchTest) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* dsl_string = R"(
@ -478,10 +470,8 @@ TEST(CApiTest, SearchTestWithExpr) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
const char* serialized_expr_plan = R"(vector_anns: <
@ -534,10 +524,8 @@ TEST(CApiTest, RetrieveTestWithExpr) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
ASSERT_EQ(ins_res.error_code, Success);
// create retrieve plan "age in [0]"
@ -574,10 +562,8 @@ TEST(CApiTest, GetMemoryUsageInBytesTest) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(res.error_code == Success);
auto memory_usage_size = GetMemoryUsageInBytes(segment);
@ -596,14 +582,12 @@ TEST(CApiTest, GetDeletedCountTest) {
std::vector<int64_t> delete_row_ids = {100000, 100001, 100002};
auto ids = std::make_unique<IdArray>();
ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end());
std::string delete_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data);
assert(marshal == true);
auto delete_data = serialize(ids.get());
uint64_t delete_timestamps[] = {0, 0, 0};
auto offset = PreDelete(segment, 3);
auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps);
auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps);
assert(del_res.error_code == Success);
// TODO: assert(deleted_count == len(delete_row_ids))
@ -625,10 +609,8 @@ TEST(CApiTest, GetRowCountTest) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(res.error_code == Success);
auto row_count = GetRowCount(segment);
@ -696,10 +678,8 @@ TEST(CApiTest, ReduceRemoveDuplicates) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* dsl_string = R"(
@ -793,10 +773,8 @@ testReduceSearchWithExpr(int N, int topK, int num_queries) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
auto fmt = boost::format(R"(vector_anns: <
@ -986,10 +964,8 @@ TEST(CApiTest, Indexing_Without_Predicate) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* dsl_string = R"(
@ -1113,10 +1089,8 @@ TEST(CApiTest, Indexing_Expr_Without_Predicate) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* serialized_expr_plan = R"(vector_anns: <
@ -1235,10 +1209,8 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* dsl_string = R"({
@ -1376,11 +1348,9 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto insert_data = serialize(dataset.raw_);
auto ins_res =
Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
}
@ -1530,10 +1500,8 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* dsl_string = R"({
@ -1668,10 +1636,8 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* serialized_expr_plan = R"(
@ -1815,10 +1781,8 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* dsl_string = R"({
@ -1955,10 +1919,8 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* serialized_expr_plan = R"(vector_anns: <
@ -2108,10 +2070,8 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* dsl_string = R"({
@ -2252,10 +2212,8 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) {
int64_t offset;
PreInsert(segment, N, &offset);
std::string insert_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data);
assert(marshal == true);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str());
auto insert_data = serialize(dataset.raw_);
auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size());
assert(ins_res.error_code == Success);
const char* serialized_expr_plan = R"(vector_anns: <
@ -2400,11 +2358,9 @@ TEST(CApiTest, SealedSegmentTest) {
auto blob = (void*)(&ages[0]);
FieldMeta field_meta(FieldName("age"), FieldId(101), DataType::INT64);
auto array = CreateScalarDataArrayFrom(ages.data(), N, field_meta);
std::string age_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*array.get(), &age_data);
assert(marshal == true);
auto age_data = serialize(array.get());
auto load_info = CLoadFieldDataInfo{101, age_data.c_str(), N};
auto load_info = CLoadFieldDataInfo{101, age_data.data(), age_data.size(), N};
auto res = LoadFieldData(segment, load_info);
assert(res.error_code == Success);
@ -2431,21 +2387,15 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64);
auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta);
std::string counter_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data);
assert(marshal == true);
auto counter_data = serialize(count_array.get());
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_ids_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data);
assert(marshal == true);
auto row_ids_data = serialize(row_ids_array.get());
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta);
std::string timestamps_data;
marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), &timestamps_data);
assert(marshal == true);
auto timestamps_data = serialize(timestamps_array.get());
const char* dsl_string = R"({
"bool": {
@ -2542,7 +2492,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
auto c_counter_field_data = CLoadFieldDataInfo{
101,
counter_data.c_str(),
counter_data.data(),
counter_data.size(),
N,
};
status = LoadFieldData(segment, c_counter_field_data);
@ -2550,7 +2501,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
auto c_id_field_data = CLoadFieldDataInfo{
0,
row_ids_data.c_str(),
row_ids_data.data(),
row_ids_data.size(),
N,
};
status = LoadFieldData(segment, c_id_field_data);
@ -2558,7 +2510,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) {
auto c_ts_field_data = CLoadFieldDataInfo{
1,
timestamps_data.c_str(),
timestamps_data.data(),
timestamps_data.size(),
N,
};
status = LoadFieldData(segment, c_ts_field_data);
@ -2598,28 +2551,20 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
auto query_ptr = vec_col.data() + 42000 * DIM;
auto vec_array = dataset.get_col(FieldId(100));
std::string vec_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*vec_array.get(), &vec_data);
assert(marshal == true);
auto vec_data = serialize(vec_array.get());
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64);
auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta);
std::string counter_data;
marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data);
assert(marshal == true);
auto counter_data = serialize(count_array.get());
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_ids_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data);
assert(marshal == true);
auto row_ids_data = serialize(row_ids_array.get());
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta);
std::string timestamps_data;
marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), &timestamps_data);
assert(marshal == true);
auto timestamps_data = serialize(timestamps_array.get());
const char* dsl_string = R"(
{
@ -2640,7 +2585,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
auto c_vec_field_data = CLoadFieldDataInfo{
100,
vec_data.c_str(),
vec_data.data(),
vec_data.size(),
N,
};
auto status = LoadFieldData(segment, c_vec_field_data);
@ -2648,7 +2594,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
auto c_counter_field_data = CLoadFieldDataInfo{
101,
counter_data.c_str(),
counter_data.data(),
counter_data.size(),
N,
};
status = LoadFieldData(segment, c_counter_field_data);
@ -2656,7 +2603,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
auto c_id_field_data = CLoadFieldDataInfo{
0,
row_ids_data.c_str(),
row_ids_data.data(),
row_ids_data.size(),
N,
};
status = LoadFieldData(segment, c_id_field_data);
@ -2664,7 +2612,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) {
auto c_ts_field_data = CLoadFieldDataInfo{
1,
timestamps_data.c_str(),
timestamps_data.data(),
timestamps_data.size(),
N,
};
status = LoadFieldData(segment, c_ts_field_data);
@ -2716,21 +2665,15 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
auto counter_col = dataset.get_col<int64_t>(FieldId(101));
FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64);
auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta);
std::string counter_data;
auto marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data);
assert(marshal == true);
auto counter_data = serialize(count_array.get());
FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64);
auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta);
std::string row_ids_data;
marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data);
assert(marshal == true);
auto row_ids_data = serialize(row_ids_array.get());
FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64);
auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta);
std::string timestamps_data;
marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), &timestamps_data);
assert(marshal == true);
auto timestamps_data = serialize(timestamps_array.get());
const char* serialized_expr_plan = R"(vector_anns: <
field_id: 100
@ -2840,7 +2783,8 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
auto c_counter_field_data = CLoadFieldDataInfo{
101,
counter_data.c_str(),
counter_data.data(),
counter_data.size(),
N,
};
status = LoadFieldData(segment, c_counter_field_data);
@ -2848,7 +2792,8 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
auto c_id_field_data = CLoadFieldDataInfo{
0,
row_ids_data.c_str(),
row_ids_data.data(),
row_ids_data.size(),
N,
};
status = LoadFieldData(segment, c_id_field_data);
@ -2856,7 +2801,8 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
auto c_ts_field_data = CLoadFieldDataInfo{
1,
timestamps_data.c_str(),
timestamps_data.data(),
timestamps_data.size(),
N,
};
status = LoadFieldData(segment, c_ts_field_data);
@ -2898,3 +2844,4 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) {
DeleteCollection(collection);
DeleteSegment(segment);
}

View File

@ -609,17 +609,6 @@ func (s *Segment) segmentPreDelete(numOfRecords int) int64 {
}
func (s *Segment) segmentInsert(offset int64, entityIDs []UniqueID, timestamps []Timestamp, record *segcorepb.InsertRecord) error {
/*
CStatus
Insert(CSegmentInterface c_segment,
long int reserved_offset,
signed long int size,
const long* primary_keys,
const unsigned long* timestamps,
void* raw_data,
int sizeof_per_row,
signed long int count);
*/
s.segPtrMu.RLock()
defer s.segPtrMu.RUnlock() // thread safe guaranteed by segCore, use RLock
if s.segmentType != segmentTypeGrowing {
@ -630,10 +619,10 @@ func (s *Segment) segmentInsert(offset int64, entityIDs []UniqueID, timestamps [
return errors.New("null seg core pointer")
}
insertRecordBlob := proto.MarshalTextString(record)
cInsertRecordBlob := C.CString(insertRecordBlob)
defer C.free(unsafe.Pointer(cInsertRecordBlob))
insertRecordBlob, err := proto.Marshal(record)
if err != nil {
return fmt.Errorf("failed to marshal insert record: %s", err)
}
var numOfRow = len(entityIDs)
var cOffset = C.int64_t(offset)
@ -646,7 +635,8 @@ func (s *Segment) segmentInsert(offset int64, entityIDs []UniqueID, timestamps [
cNumOfRows,
cEntityIdsPtr,
cTimestampsPtr,
cInsertRecordBlob)
(*C.uint8_t)(unsafe.Pointer(&insertRecordBlob[0])),
(C.uint64_t)(len(insertRecordBlob)))
if err := HandleCStatus(&status, "Insert failed"); err != nil {
return err
}
@ -709,11 +699,12 @@ func (s *Segment) segmentDelete(offset int64, entityIDs []primaryKey, timestamps
return fmt.Errorf("invalid data type of primary keys")
}
dataBlob := proto.MarshalTextString(ids)
cDataBlob := C.CString(dataBlob)
defer C.free(unsafe.Pointer(cDataBlob))
dataBlob, err := proto.Marshal(ids)
if err != nil {
return fmt.Errorf("failed to marshal ids: %s", err)
}
status := C.Delete(s.segmentPtr, cOffset, cSize, cDataBlob, cTimestampsPtr)
status := C.Delete(s.segmentPtr, cOffset, cSize, (*C.uint8_t)(unsafe.Pointer(&dataBlob[0])), (C.uint64_t)(len(dataBlob)), cTimestampsPtr)
if err := HandleCStatus(&status, "Delete failed"); err != nil {
return err
}
@ -737,21 +728,15 @@ func (s *Segment) segmentLoadFieldData(fieldID int64, rowCount int64, data *sche
return errors.New(errMsg)
}
dataBlob := proto.MarshalTextString(data)
dataBlob, err := proto.Marshal(data)
if err != nil {
return err
}
cDataBlob := C.CString(dataBlob)
defer C.free(unsafe.Pointer(cDataBlob))
/*
typedef struct CLoadFieldDataInfo {
int64_t field_id;
void* blob;
int64_t row_count;
} CLoadFieldDataInfo;
*/
loadInfo := C.CLoadFieldDataInfo{
field_id: C.int64_t(fieldID),
blob: cDataBlob,
blob: (*C.uint8_t)(unsafe.Pointer(&dataBlob[0])),
blob_size: C.uint64_t(len(dataBlob)),
row_count: C.int64_t(rowCount),
}
@ -805,15 +790,16 @@ func (s *Segment) segmentLoadDeletedRecord(primaryKeys []primaryKey, timestamps
return fmt.Errorf("invalid data type of primary keys")
}
idsBlob := proto.MarshalTextString(ids)
cIdsBlob := C.CString(idsBlob)
defer C.free(unsafe.Pointer(cIdsBlob))
idsBlob, err := proto.Marshal(ids)
if err != nil {
return err
}
loadInfo := C.CLoadDeletedRecordInfo{
timestamps: unsafe.Pointer(&timestamps[0]),
primary_keys: cIdsBlob,
row_count: C.int64_t(rowCount),
timestamps: unsafe.Pointer(&timestamps[0]),
primary_keys: (*C.uint8_t)(unsafe.Pointer(&idsBlob[0])),
primary_keys_size: C.uint64_t(len(idsBlob)),
row_count: C.int64_t(rowCount),
}
/*
CStatus