diff --git a/internal/core/src/common/type_c.h b/internal/core/src/common/type_c.h index 9306489126..470af5afd8 100644 --- a/internal/core/src/common/type_c.h +++ b/internal/core/src/common/type_c.h @@ -70,13 +70,15 @@ typedef struct CProto { typedef struct CLoadFieldDataInfo { int64_t field_id; - const char* blob; + const uint8_t* blob; + uint64_t blob_size; int64_t row_count; } CLoadFieldDataInfo; typedef struct CLoadDeletedRecordInfo { void* timestamps; - const char* primary_keys; + const uint8_t* primary_keys; + const uint64_t primary_keys_size; int64_t row_count; } CLoadDeletedRecordInfo; diff --git a/internal/core/src/segcore/segment_c.cpp b/internal/core/src/segcore/segment_c.cpp index 28f13b8672..70a0413e43 100644 --- a/internal/core/src/segcore/segment_c.cpp +++ b/internal/core/src/segcore/segment_c.cpp @@ -134,14 +134,13 @@ Insert(CSegmentInterface c_segment, int64_t size, const int64_t* row_ids, const uint64_t* timestamps, - const char* data_info) { + const uint8_t* data_info, + const uint64_t data_info_len) { try { auto segment = (milvus::segcore::SegmentGrowing*)c_segment; - auto proto = std::string(data_info); - Assert(!proto.empty()); auto insert_data = std::make_unique(); - auto suc = google::protobuf::TextFormat::ParseFromString(proto, insert_data.get()); - AssertInfo(suc, "unmarshal field data string failed"); + auto suc = insert_data->ParseFromArray(data_info, data_info_len); + AssertInfo(suc, "failed to parse insert data from records"); segment->Insert(reserved_offset, size, row_ids, timestamps, insert_data.get()); return milvus::SuccessCStatus(); @@ -162,14 +161,16 @@ PreInsert(CSegmentInterface c_segment, int64_t size, int64_t* offset) { } CStatus -Delete( - CSegmentInterface c_segment, int64_t reserved_offset, int64_t size, const char* ids, const uint64_t* timestamps) { +Delete(CSegmentInterface c_segment, + int64_t reserved_offset, + int64_t size, + const uint8_t* ids, + const uint64_t ids_size, + const uint64_t* timestamps) { auto segment = (milvus::segcore::SegmentInterface*)c_segment; - auto proto = std::string(ids); - Assert(!proto.empty()); auto pks = std::make_unique(); - auto suc = google::protobuf::TextFormat::ParseFromString(proto, pks.get()); - AssertInfo(suc, "unmarshal field data string failed"); + auto suc = pks->ParseFromArray(ids, ids_size); + AssertInfo(suc, "failed to parse pks from ids"); try { auto res = segment->Delete(reserved_offset, size, pks.get(), timestamps); return milvus::SuccessCStatus(); @@ -192,10 +193,8 @@ LoadFieldData(CSegmentInterface c_segment, CLoadFieldDataInfo load_field_data_in auto segment_interface = reinterpret_cast(c_segment); auto segment = dynamic_cast(segment_interface); AssertInfo(segment != nullptr, "segment conversion failed"); - auto proto = std::string(load_field_data_info.blob); - Assert(!proto.empty()); auto field_data = std::make_unique(); - auto suc = google::protobuf::TextFormat::ParseFromString(proto, field_data.get()); + auto suc = field_data->ParseFromArray(load_field_data_info.blob, load_field_data_info.blob_size); AssertInfo(suc, "unmarshal field data string failed"); auto load_info = LoadFieldDataInfo{load_field_data_info.field_id, field_data.get(), load_field_data_info.row_count}; @@ -211,10 +210,8 @@ LoadDeletedRecord(CSegmentInterface c_segment, CLoadDeletedRecordInfo deleted_re try { auto segment_interface = reinterpret_cast(c_segment); AssertInfo(segment_interface != nullptr, "segment conversion failed"); - auto proto = std::string(deleted_record_info.primary_keys); - Assert(!proto.empty()); auto pks = std::make_unique(); - auto suc = google::protobuf::TextFormat::ParseFromString(proto, pks.get()); + auto suc = pks->ParseFromArray(deleted_record_info.primary_keys, deleted_record_info.primary_keys_size); AssertInfo(suc, "unmarshal field data string failed"); auto load_info = LoadDeletedRecordInfo{deleted_record_info.timestamps, pks.get(), deleted_record_info.row_count}; diff --git a/internal/core/src/segcore/segment_c.h b/internal/core/src/segcore/segment_c.h index 4bd9eff20c..e5b0b02168 100644 --- a/internal/core/src/segcore/segment_c.h +++ b/internal/core/src/segcore/segment_c.h @@ -67,7 +67,8 @@ Insert(CSegmentInterface c_segment, int64_t size, const int64_t* row_ids, const uint64_t* timestamps, - const char* data_info); + const uint8_t* data_info, + const uint64_t data_info_len); CStatus PreInsert(CSegmentInterface c_segment, int64_t size, int64_t* offset); @@ -90,7 +91,12 @@ DropSealedSegmentIndex(CSegmentInterface c_segment, int64_t field_id); ////////////////////////////// interfaces for SegmentInterface ////////////////////////////// CStatus -Delete(CSegmentInterface c_segment, int64_t reserved_offset, int64_t size, const char* ids, const uint64_t* timestamps); +Delete(CSegmentInterface c_segment, + int64_t reserved_offset, + int64_t size, + const uint8_t* ids, + const uint64_t ids_size, + const uint64_t* timestamps); int64_t PreDelete(CSegmentInterface c_segment, int64_t size); diff --git a/internal/core/unittest/test_c_api.cpp b/internal/core/unittest/test_c_api.cpp index 1ec70e8ecc..2c9d42a856 100644 --- a/internal/core/unittest/test_c_api.cpp +++ b/internal/core/unittest/test_c_api.cpp @@ -198,6 +198,16 @@ TEST(CApiTest, SegmentTest) { DeleteSegment(segment); } +template +std::vector +serialize(const Message* msg) { + auto l = msg->ByteSize(); + std::vector ret(l); + auto ok = msg->SerializeToArray(ret.data(), l); + assert(ok); + return ret; +} + TEST(CApiTest, InsertTest) { auto c_collection = NewCollection(get_default_schema_config()); auto segment = NewSegment(c_collection, Growing, -1); @@ -209,10 +219,8 @@ TEST(CApiTest, InsertTest) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); DeleteCollection(c_collection); @@ -226,14 +234,12 @@ TEST(CApiTest, DeleteTest) { std::vector delete_row_ids = {100000, 100001, 100002}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); - std::string delete_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data); - assert(marshal == true); + auto delete_data = serialize(ids.get()); uint64_t delete_timestamps[] = {0, 0, 0}; auto offset = PreDelete(segment, 3); - auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps); + auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps); assert(del_res.error_code == Success); DeleteCollection(collection); @@ -248,19 +254,17 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) { int N = 10; auto dataset = DataGen(col->get_schema(), N); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); + auto insert_data = serialize(dataset.raw_); // first insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} int64_t offset; PreInsert(segment, N, &offset); - auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // second insert, pks= {0, 1, 2, 3, 4, 5, 6, 7, 8, 9} PreInsert(segment, N, &offset); - res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); // create retrieve plan pks in {1, 2, 3} @@ -285,13 +289,11 @@ TEST(CApiTest, DeleteRepeatedPksFromGrowingSegment) { std::vector delete_row_ids = {1, 2, 3}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); - std::string delete_data; - marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data); - assert(marshal == true); + auto delete_data = serialize(ids.get()); std::vector delete_timestamps(3, dataset.timestamps_[N - 1]); offset = PreDelete(segment, 3); - auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps.data()); + auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks in {1, 2, 3} @@ -320,11 +322,9 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) { for (auto& [field_id, field_meta] : col->get_schema()->get_fields()) { auto array = dataset.get_col(field_id); - std::string data; - auto marshal = google::protobuf::TextFormat::PrintToString(*array.get(), &data); - assert(marshal == true); + auto data = serialize(array.get()); - auto load_info = CLoadFieldDataInfo{field_id.get(), data.c_str(), N}; + auto load_info = CLoadFieldDataInfo{field_id.get(), data.data(), data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); @@ -334,10 +334,8 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) { FieldMeta ts_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto ts_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, ts_field_meta); - std::string ts_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*ts_array.get(), &ts_data); - assert(marshal == true); - auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.c_str(), N}; + auto ts_data = serialize(ts_array.get()); + auto load_info = CLoadFieldDataInfo{TimestampFieldID.get(), ts_data.data(), ts_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); auto count = GetRowCount(segment); @@ -345,10 +343,8 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) { FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_id_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); - std::string row_is_data; - marshal = google::protobuf::TextFormat::PrintToString(*row_id_array.get(), &row_is_data); - assert(marshal == true); - load_info = CLoadFieldDataInfo{RowFieldID.get(), ts_data.c_str(), N}; + auto row_id_data = serialize(row_id_array.get()); + load_info = CLoadFieldDataInfo{RowFieldID.get(), row_id_data.data(), row_id_data.size(), N}; res = LoadFieldData(segment, load_info); assert(res.error_code == Success); count = GetRowCount(segment); @@ -376,14 +372,12 @@ TEST(CApiTest, DeleteRepeatedPksFromSealedSegment) { std::vector delete_row_ids = {1, 2, 3}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); - std::string delete_data; - marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data); - assert(marshal == true); + auto delete_data = serialize(ids.get()); std::vector delete_timestamps(3, dataset.timestamps_[N - 1]); auto offset = PreDelete(segment, 3); - auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps.data()); + auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps.data()); assert(del_res.error_code == Success); // retrieve pks in {1, 2, 3} @@ -414,10 +408,8 @@ TEST(CApiTest, SearchTest) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); ASSERT_EQ(ins_res.error_code, Success); const char* dsl_string = R"( @@ -478,10 +470,8 @@ TEST(CApiTest, SearchTestWithExpr) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); ASSERT_EQ(ins_res.error_code, Success); const char* serialized_expr_plan = R"(vector_anns: < @@ -534,10 +524,8 @@ TEST(CApiTest, RetrieveTestWithExpr) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); ASSERT_EQ(ins_res.error_code, Success); // create retrieve plan "age in [0]" @@ -574,10 +562,8 @@ TEST(CApiTest, GetMemoryUsageInBytesTest) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); auto memory_usage_size = GetMemoryUsageInBytes(segment); @@ -596,14 +582,12 @@ TEST(CApiTest, GetDeletedCountTest) { std::vector delete_row_ids = {100000, 100001, 100002}; auto ids = std::make_unique(); ids->mutable_int_id()->mutable_data()->Add(delete_row_ids.begin(), delete_row_ids.end()); - std::string delete_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*ids.get(), &delete_data); - assert(marshal == true); + auto delete_data = serialize(ids.get()); uint64_t delete_timestamps[] = {0, 0, 0}; auto offset = PreDelete(segment, 3); - auto del_res = Delete(segment, offset, 3, delete_data.c_str(), delete_timestamps); + auto del_res = Delete(segment, offset, 3, delete_data.data(), delete_data.size(), delete_timestamps); assert(del_res.error_code == Success); // TODO: assert(deleted_count == len(delete_row_ids)) @@ -625,10 +609,8 @@ TEST(CApiTest, GetRowCountTest) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(res.error_code == Success); auto row_count = GetRowCount(segment); @@ -696,10 +678,8 @@ TEST(CApiTest, ReduceRemoveDuplicates) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"( @@ -793,10 +773,8 @@ testReduceSearchWithExpr(int N, int topK, int num_queries) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); auto fmt = boost::format(R"(vector_anns: < @@ -986,10 +964,8 @@ TEST(CApiTest, Indexing_Without_Predicate) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"( @@ -1113,10 +1089,8 @@ TEST(CApiTest, Indexing_Expr_Without_Predicate) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"(vector_anns: < @@ -1235,10 +1209,8 @@ TEST(CApiTest, Indexing_With_float_Predicate_Range) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ @@ -1376,11 +1348,9 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Range) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); + auto insert_data = serialize(dataset.raw_); auto ins_res = - Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); } @@ -1530,10 +1500,8 @@ TEST(CApiTest, Indexing_With_float_Predicate_Term) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ @@ -1668,10 +1636,8 @@ TEST(CApiTest, Indexing_Expr_With_float_Predicate_Term) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"( @@ -1815,10 +1781,8 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Range) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ @@ -1955,10 +1919,8 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Range) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"(vector_anns: < @@ -2108,10 +2070,8 @@ TEST(CApiTest, Indexing_With_binary_Predicate_Term) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* dsl_string = R"({ @@ -2252,10 +2212,8 @@ TEST(CApiTest, Indexing_Expr_With_binary_Predicate_Term) { int64_t offset; PreInsert(segment, N, &offset); - std::string insert_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*dataset.raw_, &insert_data); - assert(marshal == true); - auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.c_str()); + auto insert_data = serialize(dataset.raw_); + auto ins_res = Insert(segment, offset, N, dataset.row_ids_.data(), dataset.timestamps_.data(), insert_data.data(), insert_data.size()); assert(ins_res.error_code == Success); const char* serialized_expr_plan = R"(vector_anns: < @@ -2400,11 +2358,9 @@ TEST(CApiTest, SealedSegmentTest) { auto blob = (void*)(&ages[0]); FieldMeta field_meta(FieldName("age"), FieldId(101), DataType::INT64); auto array = CreateScalarDataArrayFrom(ages.data(), N, field_meta); - std::string age_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*array.get(), &age_data); - assert(marshal == true); + auto age_data = serialize(array.get()); - auto load_info = CLoadFieldDataInfo{101, age_data.c_str(), N}; + auto load_info = CLoadFieldDataInfo{101, age_data.data(), age_data.size(), N}; auto res = LoadFieldData(segment, load_info); assert(res.error_code == Success); @@ -2431,21 +2387,15 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { auto counter_col = dataset.get_col(FieldId(101)); FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64); auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta); - std::string counter_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data); - assert(marshal == true); + auto counter_data = serialize(count_array.get()); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); - std::string row_ids_data; - marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data); - assert(marshal == true); + auto row_ids_data = serialize(row_ids_array.get()); FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta); - std::string timestamps_data; - marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), ×tamps_data); - assert(marshal == true); + auto timestamps_data = serialize(timestamps_array.get()); const char* dsl_string = R"({ "bool": { @@ -2542,7 +2492,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { auto c_counter_field_data = CLoadFieldDataInfo{ 101, - counter_data.c_str(), + counter_data.data(), + counter_data.size(), N, }; status = LoadFieldData(segment, c_counter_field_data); @@ -2550,7 +2501,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { auto c_id_field_data = CLoadFieldDataInfo{ 0, - row_ids_data.c_str(), + row_ids_data.data(), + row_ids_data.size(), N, }; status = LoadFieldData(segment, c_id_field_data); @@ -2558,7 +2510,8 @@ TEST(CApiTest, SealedSegment_search_float_Predicate_Range) { auto c_ts_field_data = CLoadFieldDataInfo{ 1, - timestamps_data.c_str(), + timestamps_data.data(), + timestamps_data.size(), N, }; status = LoadFieldData(segment, c_ts_field_data); @@ -2598,28 +2551,20 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { auto query_ptr = vec_col.data() + 42000 * DIM; auto vec_array = dataset.get_col(FieldId(100)); - std::string vec_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*vec_array.get(), &vec_data); - assert(marshal == true); + auto vec_data = serialize(vec_array.get()); auto counter_col = dataset.get_col(FieldId(101)); FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64); auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta); - std::string counter_data; - marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data); - assert(marshal == true); + auto counter_data = serialize(count_array.get()); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); - std::string row_ids_data; - marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data); - assert(marshal == true); + auto row_ids_data = serialize(row_ids_array.get()); FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta); - std::string timestamps_data; - marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), ×tamps_data); - assert(marshal == true); + auto timestamps_data = serialize(timestamps_array.get()); const char* dsl_string = R"( { @@ -2640,7 +2585,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { auto c_vec_field_data = CLoadFieldDataInfo{ 100, - vec_data.c_str(), + vec_data.data(), + vec_data.size(), N, }; auto status = LoadFieldData(segment, c_vec_field_data); @@ -2648,7 +2594,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { auto c_counter_field_data = CLoadFieldDataInfo{ 101, - counter_data.c_str(), + counter_data.data(), + counter_data.size(), N, }; status = LoadFieldData(segment, c_counter_field_data); @@ -2656,7 +2603,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { auto c_id_field_data = CLoadFieldDataInfo{ 0, - row_ids_data.c_str(), + row_ids_data.data(), + row_ids_data.size(), N, }; status = LoadFieldData(segment, c_id_field_data); @@ -2664,7 +2612,8 @@ TEST(CApiTest, SealedSegment_search_without_predicates) { auto c_ts_field_data = CLoadFieldDataInfo{ 1, - timestamps_data.c_str(), + timestamps_data.data(), + timestamps_data.size(), N, }; status = LoadFieldData(segment, c_ts_field_data); @@ -2716,21 +2665,15 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { auto counter_col = dataset.get_col(FieldId(101)); FieldMeta counter_field_meta(FieldName("counter"), FieldId(101), DataType::INT64); auto count_array = CreateScalarDataArrayFrom(counter_col.data(), N, counter_field_meta); - std::string counter_data; - auto marshal = google::protobuf::TextFormat::PrintToString(*count_array.get(), &counter_data); - assert(marshal == true); + auto counter_data = serialize(count_array.get()); FieldMeta row_id_field_meta(FieldName("RowID"), RowFieldID, DataType::INT64); auto row_ids_array = CreateScalarDataArrayFrom(dataset.row_ids_.data(), N, row_id_field_meta); - std::string row_ids_data; - marshal = google::protobuf::TextFormat::PrintToString(*row_ids_array.get(), &row_ids_data); - assert(marshal == true); + auto row_ids_data = serialize(row_ids_array.get()); FieldMeta timestamp_field_meta(FieldName("Timestamp"), TimestampFieldID, DataType::INT64); auto timestamps_array = CreateScalarDataArrayFrom(dataset.timestamps_.data(), N, timestamp_field_meta); - std::string timestamps_data; - marshal = google::protobuf::TextFormat::PrintToString(*timestamps_array.get(), ×tamps_data); - assert(marshal == true); + auto timestamps_data = serialize(timestamps_array.get()); const char* serialized_expr_plan = R"(vector_anns: < field_id: 100 @@ -2840,7 +2783,8 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { auto c_counter_field_data = CLoadFieldDataInfo{ 101, - counter_data.c_str(), + counter_data.data(), + counter_data.size(), N, }; status = LoadFieldData(segment, c_counter_field_data); @@ -2848,7 +2792,8 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { auto c_id_field_data = CLoadFieldDataInfo{ 0, - row_ids_data.c_str(), + row_ids_data.data(), + row_ids_data.size(), N, }; status = LoadFieldData(segment, c_id_field_data); @@ -2856,7 +2801,8 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { auto c_ts_field_data = CLoadFieldDataInfo{ 1, - timestamps_data.c_str(), + timestamps_data.data(), + timestamps_data.size(), N, }; status = LoadFieldData(segment, c_ts_field_data); @@ -2898,3 +2844,4 @@ TEST(CApiTest, SealedSegment_search_float_With_Expr_Predicate_Range) { DeleteCollection(collection); DeleteSegment(segment); } + diff --git a/internal/querynode/segment.go b/internal/querynode/segment.go index b7f276d47f..370bedffe6 100644 --- a/internal/querynode/segment.go +++ b/internal/querynode/segment.go @@ -609,17 +609,6 @@ func (s *Segment) segmentPreDelete(numOfRecords int) int64 { } func (s *Segment) segmentInsert(offset int64, entityIDs []UniqueID, timestamps []Timestamp, record *segcorepb.InsertRecord) error { - /* - CStatus - Insert(CSegmentInterface c_segment, - long int reserved_offset, - signed long int size, - const long* primary_keys, - const unsigned long* timestamps, - void* raw_data, - int sizeof_per_row, - signed long int count); - */ s.segPtrMu.RLock() defer s.segPtrMu.RUnlock() // thread safe guaranteed by segCore, use RLock if s.segmentType != segmentTypeGrowing { @@ -630,10 +619,10 @@ func (s *Segment) segmentInsert(offset int64, entityIDs []UniqueID, timestamps [ return errors.New("null seg core pointer") } - insertRecordBlob := proto.MarshalTextString(record) - - cInsertRecordBlob := C.CString(insertRecordBlob) - defer C.free(unsafe.Pointer(cInsertRecordBlob)) + insertRecordBlob, err := proto.Marshal(record) + if err != nil { + return fmt.Errorf("failed to marshal insert record: %s", err) + } var numOfRow = len(entityIDs) var cOffset = C.int64_t(offset) @@ -646,7 +635,8 @@ func (s *Segment) segmentInsert(offset int64, entityIDs []UniqueID, timestamps [ cNumOfRows, cEntityIdsPtr, cTimestampsPtr, - cInsertRecordBlob) + (*C.uint8_t)(unsafe.Pointer(&insertRecordBlob[0])), + (C.uint64_t)(len(insertRecordBlob))) if err := HandleCStatus(&status, "Insert failed"); err != nil { return err } @@ -709,11 +699,12 @@ func (s *Segment) segmentDelete(offset int64, entityIDs []primaryKey, timestamps return fmt.Errorf("invalid data type of primary keys") } - dataBlob := proto.MarshalTextString(ids) - cDataBlob := C.CString(dataBlob) - defer C.free(unsafe.Pointer(cDataBlob)) + dataBlob, err := proto.Marshal(ids) + if err != nil { + return fmt.Errorf("failed to marshal ids: %s", err) + } - status := C.Delete(s.segmentPtr, cOffset, cSize, cDataBlob, cTimestampsPtr) + status := C.Delete(s.segmentPtr, cOffset, cSize, (*C.uint8_t)(unsafe.Pointer(&dataBlob[0])), (C.uint64_t)(len(dataBlob)), cTimestampsPtr) if err := HandleCStatus(&status, "Delete failed"); err != nil { return err } @@ -737,21 +728,15 @@ func (s *Segment) segmentLoadFieldData(fieldID int64, rowCount int64, data *sche return errors.New(errMsg) } - dataBlob := proto.MarshalTextString(data) + dataBlob, err := proto.Marshal(data) + if err != nil { + return err + } - cDataBlob := C.CString(dataBlob) - defer C.free(unsafe.Pointer(cDataBlob)) - - /* - typedef struct CLoadFieldDataInfo { - int64_t field_id; - void* blob; - int64_t row_count; - } CLoadFieldDataInfo; - */ loadInfo := C.CLoadFieldDataInfo{ field_id: C.int64_t(fieldID), - blob: cDataBlob, + blob: (*C.uint8_t)(unsafe.Pointer(&dataBlob[0])), + blob_size: C.uint64_t(len(dataBlob)), row_count: C.int64_t(rowCount), } @@ -805,15 +790,16 @@ func (s *Segment) segmentLoadDeletedRecord(primaryKeys []primaryKey, timestamps return fmt.Errorf("invalid data type of primary keys") } - idsBlob := proto.MarshalTextString(ids) - - cIdsBlob := C.CString(idsBlob) - defer C.free(unsafe.Pointer(cIdsBlob)) + idsBlob, err := proto.Marshal(ids) + if err != nil { + return err + } loadInfo := C.CLoadDeletedRecordInfo{ - timestamps: unsafe.Pointer(×tamps[0]), - primary_keys: cIdsBlob, - row_count: C.int64_t(rowCount), + timestamps: unsafe.Pointer(×tamps[0]), + primary_keys: (*C.uint8_t)(unsafe.Pointer(&idsBlob[0])), + primary_keys_size: C.uint64_t(len(idsBlob)), + row_count: C.int64_t(rowCount), } /* CStatus