mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-10 07:49:29 +08:00
f4dd7c7efb
issue: #34123 Benchmark case: The benchmark run the go benchmark function `BenchmarkDeltalogFormat` which is put in the Files changed. It tests the performance of serializing and deserializing from two different data formats under a 10 million delete log dataset. Metrics: The benchmarks measure the average time taken per operation (ns/op), memory allocated per operation (MB/op), and the number of memory allocations per operation (allocs/op). | Test Name | Avg Time (ns/op) | Time Comparison | Memory Allocation (MB/op) | Memory Comparison | Allocation Count (allocs/op) | Allocation Comparison | |---------------------------------|------------------|-----------------|---------------------------|-------------------|------------------------------|------------------------| | one_string_format_reader | 2,781,990,000 | Baseline | 2,422 | Baseline | 20,336,539 | Baseline | | pk_ts_separate_format_reader | 480,682,639 | -82.72% | 1,765 | -27.14% | 20,396,958 | +0.30% | | one_string_format_writer | 5,483,436,041 | Baseline | 13,900 | Baseline | 70,057,473 | Baseline | | pk_and_ts_separate_format_writer| 798,591,584 | -85.43% | 2,178 | -84.34% | 30,270,488 | -56.78% | Both read and write operations show significant improvements in both speed and memory allocation. Signed-off-by: shaoting-huang <shaoting.huang@zilliz.com>
404 lines
12 KiB
Go
404 lines
12 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package storage
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"io"
|
|
|
|
"github.com/cockroachdb/errors"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
)
|
|
|
|
// EventTypeCode represents event type by code
|
|
type EventTypeCode int8
|
|
|
|
// EventTypeCode definitions
|
|
const (
|
|
DescriptorEventType EventTypeCode = iota
|
|
InsertEventType
|
|
DeleteEventType
|
|
CreateCollectionEventType
|
|
DropCollectionEventType
|
|
CreatePartitionEventType
|
|
DropPartitionEventType
|
|
IndexFileEventType
|
|
EventTypeEnd
|
|
)
|
|
|
|
// String returns the string representation
|
|
func (code EventTypeCode) String() string {
|
|
codes := map[EventTypeCode]string{
|
|
DescriptorEventType: "DescriptorEventType",
|
|
InsertEventType: "InsertEventType",
|
|
DeleteEventType: "DeleteEventType",
|
|
CreateCollectionEventType: "CreateCollectionEventType",
|
|
DropCollectionEventType: "DropCollectionEventType",
|
|
CreatePartitionEventType: "CreatePartitionEventType",
|
|
DropPartitionEventType: "DropPartitionEventType",
|
|
IndexFileEventType: "IndexFileEventType",
|
|
}
|
|
if eventTypeStr, ok := codes[code]; ok {
|
|
return eventTypeStr
|
|
}
|
|
return "InvalidEventType"
|
|
}
|
|
|
|
type descriptorEvent struct {
|
|
descriptorEventHeader
|
|
descriptorEventData
|
|
}
|
|
|
|
// GetMemoryUsageInBytes returns descriptor Event memory usage in bytes
|
|
func (event *descriptorEvent) GetMemoryUsageInBytes() int32 {
|
|
return event.descriptorEventHeader.GetMemoryUsageInBytes() + event.descriptorEventData.GetMemoryUsageInBytes()
|
|
}
|
|
|
|
// Write writes descriptor event into buffer
|
|
func (event *descriptorEvent) Write(buffer io.Writer) error {
|
|
err := event.descriptorEventData.FinishExtra()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
event.descriptorEventHeader.EventLength = event.descriptorEventHeader.GetMemoryUsageInBytes() + event.descriptorEventData.GetMemoryUsageInBytes()
|
|
event.descriptorEventHeader.NextPosition = int32(binary.Size(MagicNumber)) + event.descriptorEventHeader.EventLength
|
|
|
|
if err := event.descriptorEventHeader.Write(buffer); err != nil {
|
|
return err
|
|
}
|
|
if err := event.descriptorEventData.Write(buffer); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// EventWriter abstracts event writer
|
|
type EventWriter interface {
|
|
PayloadWriterInterface
|
|
// Finish set meta in header and no data can be added to event writer
|
|
Finish() error
|
|
// Close release resources
|
|
Close()
|
|
// Write serialize to buffer, should call Finish first
|
|
Write(buffer *bytes.Buffer) error
|
|
GetMemoryUsageInBytes() (int32, error)
|
|
SetOffset(offset int32)
|
|
}
|
|
|
|
type baseEventWriter struct {
|
|
eventHeader
|
|
PayloadWriterInterface
|
|
isClosed bool
|
|
isFinish bool
|
|
offset int32
|
|
getEventDataSize func() int32
|
|
writeEventData func(buffer io.Writer) error
|
|
}
|
|
|
|
func (writer *baseEventWriter) GetMemoryUsageInBytes() (int32, error) {
|
|
data, err := writer.GetPayloadBufferFromWriter()
|
|
if err != nil {
|
|
return -1, err
|
|
}
|
|
size := writer.getEventDataSize() + writer.eventHeader.GetMemoryUsageInBytes() + int32(len(data))
|
|
return size, nil
|
|
}
|
|
|
|
func (writer *baseEventWriter) Write(buffer *bytes.Buffer) error {
|
|
if err := writer.eventHeader.Write(buffer); err != nil {
|
|
return err
|
|
}
|
|
if err := writer.writeEventData(buffer); err != nil {
|
|
return err
|
|
}
|
|
data, err := writer.GetPayloadBufferFromWriter()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := binary.Write(buffer, common.Endian, data); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (writer *baseEventWriter) Finish() error {
|
|
if !writer.isFinish {
|
|
writer.isFinish = true
|
|
if err := writer.FinishPayloadWriter(); err != nil {
|
|
return err
|
|
}
|
|
eventLength, err := writer.GetMemoryUsageInBytes()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
writer.EventLength = eventLength
|
|
writer.NextPosition = eventLength + writer.offset
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (writer *baseEventWriter) Close() {
|
|
if !writer.isClosed {
|
|
writer.isFinish = true
|
|
writer.isClosed = true
|
|
writer.ReleasePayloadWriter()
|
|
}
|
|
}
|
|
|
|
func (writer *baseEventWriter) SetOffset(offset int32) {
|
|
writer.offset = offset
|
|
}
|
|
|
|
type insertEventWriter struct {
|
|
baseEventWriter
|
|
insertEventData
|
|
}
|
|
|
|
type deleteEventWriter struct {
|
|
baseEventWriter
|
|
deleteEventData
|
|
}
|
|
|
|
type createCollectionEventWriter struct {
|
|
baseEventWriter
|
|
createCollectionEventData
|
|
}
|
|
|
|
type dropCollectionEventWriter struct {
|
|
baseEventWriter
|
|
dropCollectionEventData
|
|
}
|
|
|
|
type createPartitionEventWriter struct {
|
|
baseEventWriter
|
|
createPartitionEventData
|
|
}
|
|
|
|
type dropPartitionEventWriter struct {
|
|
baseEventWriter
|
|
dropPartitionEventData
|
|
}
|
|
|
|
type indexFileEventWriter struct {
|
|
baseEventWriter
|
|
indexFileEventData
|
|
}
|
|
|
|
func newDescriptorEvent() *descriptorEvent {
|
|
header := newDescriptorEventHeader()
|
|
data := newDescriptorEventData()
|
|
return &descriptorEvent{
|
|
descriptorEventHeader: *header,
|
|
descriptorEventData: *data,
|
|
}
|
|
}
|
|
|
|
func NewBaseDescriptorEvent(collectionID int64, partitionID int64, segmentID int64) *descriptorEvent {
|
|
de := newDescriptorEvent()
|
|
de.CollectionID = collectionID
|
|
de.PartitionID = partitionID
|
|
de.SegmentID = segmentID
|
|
de.StartTimestamp = 0
|
|
de.EndTimestamp = 0
|
|
return de
|
|
}
|
|
|
|
func newInsertEventWriter(dataType schemapb.DataType, nullable bool, dim ...int) (*insertEventWriter, error) {
|
|
var payloadWriter PayloadWriterInterface
|
|
var err error
|
|
if typeutil.IsVectorType(dataType) && !typeutil.IsSparseFloatVectorType(dataType) {
|
|
if len(dim) != 1 {
|
|
return nil, fmt.Errorf("incorrect input numbers")
|
|
}
|
|
payloadWriter, err = NewPayloadWriter(dataType, nullable, dim[0])
|
|
} else {
|
|
payloadWriter, err = NewPayloadWriter(dataType, nullable)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(InsertEventType)
|
|
data := newInsertEventData()
|
|
|
|
writer := &insertEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
insertEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.insertEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.insertEventData.WriteEventData
|
|
return writer, nil
|
|
}
|
|
|
|
func newDeleteEventWriter(dataType schemapb.DataType) (*deleteEventWriter, error) {
|
|
payloadWriter, err := NewPayloadWriter(dataType, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(DeleteEventType)
|
|
data := newDeleteEventData()
|
|
|
|
writer := &deleteEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
deleteEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.deleteEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.deleteEventData.WriteEventData
|
|
return writer, nil
|
|
}
|
|
|
|
func newCreateCollectionEventWriter(dataType schemapb.DataType) (*createCollectionEventWriter, error) {
|
|
if dataType != schemapb.DataType_String && dataType != schemapb.DataType_Int64 {
|
|
return nil, errors.New("incorrect data type")
|
|
}
|
|
|
|
payloadWriter, err := NewPayloadWriter(dataType, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(CreateCollectionEventType)
|
|
data := newCreateCollectionEventData()
|
|
|
|
writer := &createCollectionEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
createCollectionEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.createCollectionEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.createCollectionEventData.WriteEventData
|
|
return writer, nil
|
|
}
|
|
|
|
func newDropCollectionEventWriter(dataType schemapb.DataType) (*dropCollectionEventWriter, error) {
|
|
if dataType != schemapb.DataType_String && dataType != schemapb.DataType_Int64 {
|
|
return nil, errors.New("incorrect data type")
|
|
}
|
|
|
|
payloadWriter, err := NewPayloadWriter(dataType, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(DropCollectionEventType)
|
|
data := newDropCollectionEventData()
|
|
|
|
writer := &dropCollectionEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
dropCollectionEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.dropCollectionEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.dropCollectionEventData.WriteEventData
|
|
return writer, nil
|
|
}
|
|
|
|
func newCreatePartitionEventWriter(dataType schemapb.DataType) (*createPartitionEventWriter, error) {
|
|
if dataType != schemapb.DataType_String && dataType != schemapb.DataType_Int64 {
|
|
return nil, errors.New("incorrect data type")
|
|
}
|
|
|
|
payloadWriter, err := NewPayloadWriter(dataType, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(CreatePartitionEventType)
|
|
data := newCreatePartitionEventData()
|
|
|
|
writer := &createPartitionEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
createPartitionEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.createPartitionEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.createPartitionEventData.WriteEventData
|
|
return writer, nil
|
|
}
|
|
|
|
func newDropPartitionEventWriter(dataType schemapb.DataType) (*dropPartitionEventWriter, error) {
|
|
if dataType != schemapb.DataType_String && dataType != schemapb.DataType_Int64 {
|
|
return nil, errors.New("incorrect data type")
|
|
}
|
|
|
|
payloadWriter, err := NewPayloadWriter(dataType, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(DropPartitionEventType)
|
|
data := newDropPartitionEventData()
|
|
|
|
writer := &dropPartitionEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
dropPartitionEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.dropPartitionEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.dropPartitionEventData.WriteEventData
|
|
return writer, nil
|
|
}
|
|
|
|
func newIndexFileEventWriter(dataType schemapb.DataType) (*indexFileEventWriter, error) {
|
|
payloadWriter, err := NewPayloadWriter(dataType, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
header := newEventHeader(IndexFileEventType)
|
|
data := newIndexFileEventData()
|
|
|
|
writer := &indexFileEventWriter{
|
|
baseEventWriter: baseEventWriter{
|
|
eventHeader: *header,
|
|
PayloadWriterInterface: payloadWriter,
|
|
isClosed: false,
|
|
isFinish: false,
|
|
},
|
|
indexFileEventData: *data,
|
|
}
|
|
writer.baseEventWriter.getEventDataSize = writer.indexFileEventData.GetEventDataFixPartSize
|
|
writer.baseEventWriter.writeEventData = writer.indexFileEventData.WriteEventData
|
|
|
|
return writer, nil
|
|
}
|