mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 02:48:45 +08:00
3d46096f86
See also: #29204 Signed-off-by: yangxuan <xuan.yang@zilliz.com>
216 lines
6.6 KiB
Go
216 lines
6.6 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package syncmgr
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strconv"
|
|
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/datanode/metacache"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/metrics"
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
|
)
|
|
|
|
type storageV1Serializer struct {
|
|
collectionID int64
|
|
schema *schemapb.CollectionSchema
|
|
pkField *schemapb.FieldSchema
|
|
|
|
inCodec *storage.InsertCodec
|
|
delCodec *storage.DeleteCodec
|
|
|
|
metacache metacache.MetaCache
|
|
metaWriter MetaWriter
|
|
}
|
|
|
|
func NewStorageSerializer(metacache metacache.MetaCache, metaWriter MetaWriter) (*storageV1Serializer, error) {
|
|
collectionID := metacache.Collection()
|
|
schema := metacache.Schema()
|
|
pkField := lo.FindOrElse(schema.GetFields(), nil, func(field *schemapb.FieldSchema) bool { return field.GetIsPrimaryKey() })
|
|
if pkField == nil {
|
|
return nil, merr.WrapErrServiceInternal("cannot find pk field")
|
|
}
|
|
meta := &etcdpb.CollectionMeta{
|
|
Schema: schema,
|
|
ID: collectionID,
|
|
}
|
|
inCodec := storage.NewInsertCodecWithSchema(meta)
|
|
return &storageV1Serializer{
|
|
collectionID: collectionID,
|
|
schema: schema,
|
|
pkField: pkField,
|
|
|
|
inCodec: inCodec,
|
|
delCodec: storage.NewDeleteCodec(),
|
|
metacache: metacache,
|
|
metaWriter: metaWriter,
|
|
}, nil
|
|
}
|
|
|
|
func (s *storageV1Serializer) EncodeBuffer(ctx context.Context, pack *SyncPack) (Task, error) {
|
|
task := NewSyncTask()
|
|
tr := timerecord.NewTimeRecorder("storage_serializer")
|
|
|
|
log := log.Ctx(ctx).With(
|
|
zap.Int64("segmentID", pack.segmentID),
|
|
zap.Int64("collectionID", pack.collectionID),
|
|
zap.String("channel", pack.channelName),
|
|
)
|
|
|
|
if pack.insertData != nil {
|
|
memSize := make(map[int64]int64)
|
|
for fieldID, fieldData := range pack.insertData.Data {
|
|
memSize[fieldID] = int64(fieldData.GetMemorySize())
|
|
}
|
|
task.binlogMemsize = memSize
|
|
|
|
binlogBlobs, err := s.serializeBinlog(ctx, pack)
|
|
if err != nil {
|
|
log.Warn("failed to serialize binlog", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
task.binlogBlobs = binlogBlobs
|
|
|
|
singlePKStats, batchStatsBlob, err := s.serializeStatslog(pack)
|
|
if err != nil {
|
|
log.Warn("failed to serialized statslog", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
task.batchStatsBlob = batchStatsBlob
|
|
s.metacache.UpdateSegments(metacache.RollStats(singlePKStats), metacache.WithSegmentIDs(pack.segmentID))
|
|
}
|
|
|
|
if pack.isFlush {
|
|
if pack.level != datapb.SegmentLevel_L0 {
|
|
mergedStatsBlob, err := s.serializeMergedPkStats(pack)
|
|
if err != nil {
|
|
log.Warn("failed to serialize merged stats log", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
task.mergedStatsBlob = mergedStatsBlob
|
|
}
|
|
|
|
task.WithFlush()
|
|
}
|
|
|
|
if pack.deltaData != nil {
|
|
deltaBlob, err := s.serializeDeltalog(pack)
|
|
if err != nil {
|
|
log.Warn("failed to serialize delta log", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
task.deltaBlob = deltaBlob
|
|
task.deltaRowCount = pack.deltaData.RowCount
|
|
}
|
|
if pack.isDrop {
|
|
task.WithDrop()
|
|
}
|
|
|
|
s.setTaskMeta(task, pack)
|
|
|
|
metrics.DataNodeEncodeBufferLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID()), pack.level.String()).Observe(float64(tr.RecordSpan().Milliseconds()))
|
|
return task, nil
|
|
}
|
|
|
|
func (s *storageV1Serializer) setTaskMeta(task *SyncTask, pack *SyncPack) {
|
|
task.WithCollectionID(pack.collectionID).
|
|
WithPartitionID(pack.partitionID).
|
|
WithChannelName(pack.channelName).
|
|
WithSegmentID(pack.segmentID).
|
|
WithBatchSize(pack.batchSize).
|
|
WithSchema(s.metacache.Schema()).
|
|
WithStartPosition(pack.startPosition).
|
|
WithCheckpoint(pack.checkpoint).
|
|
WithLevel(pack.level).
|
|
WithTimeRange(pack.tsFrom, pack.tsTo).
|
|
WithMetaCache(s.metacache).
|
|
WithMetaWriter(s.metaWriter).
|
|
WithFailureCallback(func(err error) {
|
|
// TODO could change to unsub channel in the future
|
|
panic(err)
|
|
})
|
|
}
|
|
|
|
func (s *storageV1Serializer) serializeBinlog(ctx context.Context, pack *SyncPack) (map[int64]*storage.Blob, error) {
|
|
blobs, err := s.inCodec.Serialize(pack.partitionID, pack.segmentID, pack.insertData)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
result := make(map[int64]*storage.Blob)
|
|
for _, blob := range blobs {
|
|
fieldID, err := strconv.ParseInt(blob.GetKey(), 10, 64)
|
|
if err != nil {
|
|
log.Error("serialize buffer failed ... cannot parse string to fieldID ..", zap.Error(err))
|
|
return nil, err
|
|
}
|
|
|
|
result[fieldID] = blob
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func (s *storageV1Serializer) serializeStatslog(pack *SyncPack) (*storage.PrimaryKeyStats, *storage.Blob, error) {
|
|
pkFieldData := pack.insertData.Data[s.pkField.GetFieldID()]
|
|
rowNum := int64(pkFieldData.RowNum())
|
|
|
|
stats, err := storage.NewPrimaryKeyStats(s.pkField.GetFieldID(), int64(s.pkField.GetDataType()), rowNum)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
stats.UpdateByMsgs(pkFieldData)
|
|
|
|
blob, err := s.inCodec.SerializePkStats(stats, pack.batchSize)
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
return stats, blob, nil
|
|
}
|
|
|
|
func (s *storageV1Serializer) serializeMergedPkStats(pack *SyncPack) (*storage.Blob, error) {
|
|
segment, ok := s.metacache.GetSegmentByID(pack.segmentID)
|
|
if !ok {
|
|
return nil, merr.WrapErrSegmentNotFound(pack.segmentID)
|
|
}
|
|
|
|
return s.inCodec.SerializePkStatsList(lo.Map(segment.GetHistory(), func(pks *storage.PkStatistics, _ int) *storage.PrimaryKeyStats {
|
|
return &storage.PrimaryKeyStats{
|
|
FieldID: s.pkField.GetFieldID(),
|
|
MaxPk: pks.MaxPK,
|
|
MinPk: pks.MinPK,
|
|
BF: pks.PkFilter,
|
|
PkType: int64(s.pkField.GetDataType()),
|
|
}
|
|
}), segment.NumOfRows())
|
|
}
|
|
|
|
func (s *storageV1Serializer) serializeDeltalog(pack *SyncPack) (*storage.Blob, error) {
|
|
return s.delCodec.Serialize(pack.collectionID, pack.partitionID, pack.segmentID, pack.deltaData)
|
|
}
|