2023-11-04 12:10:17 +08:00
|
|
|
package writebuffer
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
|
2024-06-14 11:37:56 +08:00
|
|
|
"github.com/samber/lo"
|
2023-11-04 12:10:17 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2023-11-17 21:46:20 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
2023-11-04 12:10:17 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
|
|
"github.com/milvus-io/milvus/internal/allocator"
|
2024-06-14 11:37:56 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/datanode/io"
|
2023-11-04 12:10:17 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/datanode/metacache"
|
|
|
|
"github.com/milvus-io/milvus/internal/datanode/syncmgr"
|
2023-11-17 21:46:20 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
2023-11-04 12:10:17 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2024-01-19 17:28:53 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
2023-11-04 12:10:17 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
2024-06-14 11:37:56 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/conc"
|
2023-11-04 12:10:17 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2024-06-13 17:57:56 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
2023-11-04 12:10:17 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/retry"
|
2024-01-19 17:28:53 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2023-11-04 12:10:17 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type l0WriteBuffer struct {
|
|
|
|
*writeBufferBase
|
|
|
|
|
2023-11-17 21:46:20 +08:00
|
|
|
l0Segments map[int64]int64 // partitionID => l0 segment ID
|
|
|
|
l0partition map[int64]int64 // l0 segment id => partition id
|
2023-11-04 12:10:17 +08:00
|
|
|
|
|
|
|
syncMgr syncmgr.SyncManager
|
|
|
|
idAllocator allocator.Interface
|
|
|
|
}
|
|
|
|
|
2023-11-23 17:26:24 +08:00
|
|
|
func NewL0WriteBuffer(channel string, metacache metacache.MetaCache, storageV2Cache *metacache.StorageV2Cache, syncMgr syncmgr.SyncManager, option *writeBufferOption) (WriteBuffer, error) {
|
2023-11-04 12:10:17 +08:00
|
|
|
if option.idAllocator == nil {
|
|
|
|
return nil, merr.WrapErrServiceInternal("id allocator is nil when creating l0 write buffer")
|
|
|
|
}
|
2023-12-26 10:40:47 +08:00
|
|
|
base, err := newWriteBufferBase(channel, metacache, storageV2Cache, syncMgr, option)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-11-04 12:10:17 +08:00
|
|
|
return &l0WriteBuffer{
|
|
|
|
l0Segments: make(map[int64]int64),
|
2023-11-17 21:46:20 +08:00
|
|
|
l0partition: make(map[int64]int64),
|
2023-12-26 10:40:47 +08:00
|
|
|
writeBufferBase: base,
|
2023-11-04 12:10:17 +08:00
|
|
|
syncMgr: syncMgr,
|
|
|
|
idAllocator: option.idAllocator,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
2024-01-19 17:28:53 +08:00
|
|
|
func (wb *l0WriteBuffer) dispatchDeleteMsgs(groups []*inData, deleteMsgs []*msgstream.DeleteMsg, startPos, endPos *msgpb.MsgPosition) {
|
2024-06-13 17:57:56 +08:00
|
|
|
batchSize := paramtable.Get().CommonCfg.BloomFilterApplyBatchSize.GetAsInt()
|
2024-06-14 11:37:56 +08:00
|
|
|
split := func(pks []storage.PrimaryKey, pkTss []uint64, partitionSegments []*metacache.SegmentInfo, partitionGroups []*inData) []bool {
|
2024-06-13 17:57:56 +08:00
|
|
|
lc := storage.NewBatchLocationsCache(pks)
|
2024-06-14 11:37:56 +08:00
|
|
|
|
|
|
|
// use hits to cache result
|
|
|
|
hits := make([]bool, len(pks))
|
|
|
|
|
|
|
|
for _, segment := range partitionSegments {
|
2024-01-19 17:28:53 +08:00
|
|
|
if segment.CompactTo() != 0 {
|
|
|
|
continue
|
|
|
|
}
|
2024-06-14 11:37:56 +08:00
|
|
|
hits = segment.GetBloomFilterSet().BatchPkExistWithHits(lc, hits)
|
|
|
|
}
|
2024-06-13 17:57:56 +08:00
|
|
|
|
2024-06-14 11:37:56 +08:00
|
|
|
for _, inData := range partitionGroups {
|
|
|
|
hits = inData.batchPkExists(pks, pkTss, hits)
|
2024-01-19 17:28:53 +08:00
|
|
|
}
|
2024-06-14 11:37:56 +08:00
|
|
|
|
|
|
|
return hits
|
2024-06-13 17:57:56 +08:00
|
|
|
}
|
|
|
|
|
2024-06-14 11:37:56 +08:00
|
|
|
type BatchApplyRet = struct {
|
|
|
|
// represent the idx for delete msg in deleteMsgs
|
|
|
|
DeleteDataIdx int
|
|
|
|
// represent the start idx for the batch in each deleteMsg
|
|
|
|
StartIdx int
|
|
|
|
Hits []bool
|
|
|
|
}
|
|
|
|
|
|
|
|
// transform pk to primary key
|
|
|
|
pksInDeleteMsgs := lo.Map(deleteMsgs, func(delMsg *msgstream.DeleteMsg, _ int) []storage.PrimaryKey {
|
|
|
|
return storage.ParseIDs2PrimaryKeys(delMsg.GetPrimaryKeys())
|
|
|
|
})
|
|
|
|
|
|
|
|
retIdx := 0
|
|
|
|
retMap := typeutil.NewConcurrentMap[int, *BatchApplyRet]()
|
|
|
|
pool := io.GetBFApplyPool()
|
|
|
|
var futures []*conc.Future[any]
|
|
|
|
for didx, delMsg := range deleteMsgs {
|
|
|
|
pks := pksInDeleteMsgs[didx]
|
2024-06-13 17:57:56 +08:00
|
|
|
pkTss := delMsg.GetTimestamps()
|
2024-06-14 11:37:56 +08:00
|
|
|
partitionSegments := wb.metaCache.GetSegmentsBy(metacache.WithPartitionID(delMsg.PartitionID),
|
2024-06-13 17:57:56 +08:00
|
|
|
metacache.WithSegmentState(commonpb.SegmentState_Growing, commonpb.SegmentState_Flushing, commonpb.SegmentState_Flushed))
|
2024-06-14 11:37:56 +08:00
|
|
|
partitionGroups := lo.Filter(groups, func(inData *inData, _ int) bool {
|
|
|
|
return delMsg.GetPartitionID() == common.AllPartitionsID || delMsg.GetPartitionID() == inData.partitionID
|
|
|
|
})
|
2024-06-13 17:57:56 +08:00
|
|
|
|
|
|
|
for idx := 0; idx < len(pks); idx += batchSize {
|
2024-06-14 11:37:56 +08:00
|
|
|
startIdx := idx
|
2024-06-13 17:57:56 +08:00
|
|
|
endIdx := idx + batchSize
|
|
|
|
if endIdx > len(pks) {
|
|
|
|
endIdx = len(pks)
|
|
|
|
}
|
2024-06-14 11:37:56 +08:00
|
|
|
retIdx += 1
|
|
|
|
tmpRetIdx := retIdx
|
|
|
|
deleteDataId := didx
|
|
|
|
future := pool.Submit(func() (any, error) {
|
|
|
|
hits := split(pks[startIdx:endIdx], pkTss[startIdx:endIdx], partitionSegments, partitionGroups)
|
|
|
|
retMap.Insert(tmpRetIdx, &BatchApplyRet{
|
|
|
|
DeleteDataIdx: deleteDataId,
|
|
|
|
StartIdx: startIdx,
|
|
|
|
Hits: hits,
|
|
|
|
})
|
|
|
|
return nil, nil
|
|
|
|
})
|
|
|
|
futures = append(futures, future)
|
2024-06-13 17:57:56 +08:00
|
|
|
}
|
2024-06-14 11:37:56 +08:00
|
|
|
}
|
|
|
|
conc.AwaitAll(futures...)
|
2024-01-19 17:28:53 +08:00
|
|
|
|
2024-06-14 11:37:56 +08:00
|
|
|
retMap.Range(func(key int, value *BatchApplyRet) bool {
|
|
|
|
l0SegmentID := wb.getL0SegmentID(deleteMsgs[value.DeleteDataIdx].GetPartitionID(), startPos)
|
|
|
|
pks := pksInDeleteMsgs[value.DeleteDataIdx]
|
|
|
|
pkTss := deleteMsgs[value.DeleteDataIdx].GetTimestamps()
|
|
|
|
|
|
|
|
var deletePks []storage.PrimaryKey
|
|
|
|
var deleteTss []typeutil.Timestamp
|
|
|
|
for i, hit := range value.Hits {
|
|
|
|
if hit {
|
|
|
|
deletePks = append(deletePks, pks[value.StartIdx+i])
|
|
|
|
deleteTss = append(deleteTss, pkTss[value.StartIdx+i])
|
2024-01-19 17:28:53 +08:00
|
|
|
}
|
|
|
|
}
|
2024-06-14 11:37:56 +08:00
|
|
|
if len(deletePks) > 0 {
|
|
|
|
wb.bufferDelete(l0SegmentID, deletePks, deleteTss, startPos, endPos)
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
2024-01-19 17:28:53 +08:00
|
|
|
}
|
|
|
|
|
2023-11-04 12:10:17 +08:00
|
|
|
func (wb *l0WriteBuffer) BufferData(insertMsgs []*msgstream.InsertMsg, deleteMsgs []*msgstream.DeleteMsg, startPos, endPos *msgpb.MsgPosition) error {
|
|
|
|
wb.mut.Lock()
|
|
|
|
defer wb.mut.Unlock()
|
|
|
|
|
2024-01-19 17:28:53 +08:00
|
|
|
groups, err := wb.prepareInsert(insertMsgs)
|
2023-11-04 12:10:17 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-01-19 17:28:53 +08:00
|
|
|
// buffer insert data and add segment if not exists
|
|
|
|
for _, inData := range groups {
|
|
|
|
err := wb.bufferInsert(inData, startPos, endPos)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// distribute delete msg
|
|
|
|
// bf write buffer check bloom filter of segment and current insert batch to decide which segment to write delete data
|
|
|
|
wb.dispatchDeleteMsgs(groups, deleteMsgs, startPos, endPos)
|
|
|
|
|
2023-11-25 15:10:25 +08:00
|
|
|
// update pk oracle
|
2024-01-19 17:28:53 +08:00
|
|
|
for _, inData := range groups {
|
|
|
|
// segment shall always exists after buffer insert
|
|
|
|
segments := wb.metaCache.GetSegmentsBy(metacache.WithSegmentIDs(inData.segmentID))
|
2023-11-25 15:10:25 +08:00
|
|
|
for _, segment := range segments {
|
2024-01-19 17:28:53 +08:00
|
|
|
for _, fieldData := range inData.pkField {
|
2023-11-25 15:10:25 +08:00
|
|
|
err := segment.GetBloomFilterSet().UpdatePKRange(fieldData)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-04 12:10:17 +08:00
|
|
|
// update buffer last checkpoint
|
|
|
|
wb.checkpoint = endPos
|
|
|
|
|
2023-11-17 21:46:20 +08:00
|
|
|
segmentsSync := wb.triggerSync()
|
|
|
|
for _, segment := range segmentsSync {
|
|
|
|
partition, ok := wb.l0partition[segment]
|
|
|
|
if ok {
|
|
|
|
delete(wb.l0partition, segment)
|
|
|
|
delete(wb.l0Segments, partition)
|
|
|
|
}
|
|
|
|
}
|
2023-11-24 15:38:25 +08:00
|
|
|
|
|
|
|
wb.cleanupCompactedSegments()
|
2023-11-17 21:46:20 +08:00
|
|
|
return nil
|
2023-11-04 12:10:17 +08:00
|
|
|
}
|
|
|
|
|
2023-11-17 21:46:20 +08:00
|
|
|
func (wb *l0WriteBuffer) getL0SegmentID(partitionID int64, startPos *msgpb.MsgPosition) int64 {
|
2024-05-22 21:11:40 +08:00
|
|
|
log := wb.logger
|
2023-11-04 12:10:17 +08:00
|
|
|
segmentID, ok := wb.l0Segments[partitionID]
|
|
|
|
if !ok {
|
|
|
|
err := retry.Do(context.Background(), func() error {
|
|
|
|
var err error
|
|
|
|
segmentID, err = wb.idAllocator.AllocOne()
|
|
|
|
return err
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to allocate l0 segment ID", zap.Error(err))
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
wb.l0Segments[partitionID] = segmentID
|
2023-11-17 21:46:20 +08:00
|
|
|
wb.l0partition[segmentID] = partitionID
|
|
|
|
wb.metaCache.AddSegment(&datapb.SegmentInfo{
|
|
|
|
ID: segmentID,
|
|
|
|
PartitionID: partitionID,
|
|
|
|
CollectionID: wb.collectionID,
|
|
|
|
InsertChannel: wb.channelName,
|
|
|
|
StartPosition: startPos,
|
|
|
|
State: commonpb.SegmentState_Growing,
|
|
|
|
Level: datapb.SegmentLevel_L0,
|
|
|
|
}, func(_ *datapb.SegmentInfo) *metacache.BloomFilterSet { return metacache.NewBloomFilterSet() }, metacache.SetStartPosRecorded(false))
|
2024-01-04 17:22:46 +08:00
|
|
|
log.Info("Add a new level zero segment",
|
|
|
|
zap.Int64("segmentID", segmentID),
|
|
|
|
zap.String("level", datapb.SegmentLevel_L0.String()),
|
|
|
|
zap.Any("start position", startPos),
|
|
|
|
)
|
2023-11-04 12:10:17 +08:00
|
|
|
}
|
|
|
|
return segmentID
|
|
|
|
}
|