// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package datacoord import ( "time" "github.com/golang/protobuf/proto" "go.uber.org/zap" "github.com/milvus-io/milvus-proto/go-api/commonpb" "github.com/milvus-io/milvus-proto/go-api/msgpb" "github.com/milvus-io/milvus/internal/metastore/model" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/pkg/log" ) // SegmentsInfo wraps a map, which maintains ID to SegmentInfo relation type SegmentsInfo struct { segments map[UniqueID]*SegmentInfo } // SegmentInfo wraps datapb.SegmentInfo and patches some extra info on it type SegmentInfo struct { *datapb.SegmentInfo segmentIndexes map[UniqueID]*model.SegmentIndex currRows int64 allocations []*Allocation lastFlushTime time.Time isCompacting bool // a cache to avoid calculate twice size int64 lastWrittenTime time.Time } // NewSegmentInfo create `SegmentInfo` wrapper from `datapb.SegmentInfo` // assign current rows to last checkpoint and pre-allocate `allocations` slice // Note that the allocation information is not preserved, // the worst case scenario is to have a segment with twice size we expects func NewSegmentInfo(info *datapb.SegmentInfo) *SegmentInfo { return &SegmentInfo{ SegmentInfo: info, segmentIndexes: make(map[UniqueID]*model.SegmentIndex), currRows: info.GetNumOfRows(), allocations: make([]*Allocation, 0, 16), lastFlushTime: time.Now().Add(-1 * flushInterval), // A growing segment from recovery can be also considered idle. lastWrittenTime: getZeroTime(), } } // NewSegmentsInfo creates a `SegmentsInfo` instance, which makes sure internal map is initialized // note that no mutex is wrapped so external concurrent control is needed func NewSegmentsInfo() *SegmentsInfo { return &SegmentsInfo{segments: make(map[UniqueID]*SegmentInfo)} } // GetSegment returns SegmentInfo func (s *SegmentsInfo) GetSegment(segmentID UniqueID) *SegmentInfo { segment, ok := s.segments[segmentID] if !ok { return nil } return segment } // GetSegments iterates internal map and returns all SegmentInfo in a slice // no deep copy applied func (s *SegmentsInfo) GetSegments() []*SegmentInfo { segments := make([]*SegmentInfo, 0, len(s.segments)) for _, segment := range s.segments { segments = append(segments, segment) } return segments } // DropSegment deletes provided segmentID // no extra method is taken when segmentID not exists func (s *SegmentsInfo) DropSegment(segmentID UniqueID) { delete(s.segments, segmentID) } // SetSegment sets SegmentInfo with segmentID, perform overwrite if already exists func (s *SegmentsInfo) SetSegment(segmentID UniqueID, segment *SegmentInfo) { s.segments[segmentID] = segment } // SetSegmentIndex sets SegmentIndex with segmentID, perform overwrite if already exists func (s *SegmentsInfo) SetSegmentIndex(segmentID UniqueID, segIndex *model.SegmentIndex) { segment, ok := s.segments[segmentID] if !ok { log.Warn("segment missing for set segment index", zap.Int64("segmentID", segmentID), zap.Int64("indexID", segIndex.IndexID), ) return } segment = segment.Clone() if segment.segmentIndexes == nil { segment.segmentIndexes = make(map[UniqueID]*model.SegmentIndex) } segment.segmentIndexes[segIndex.IndexID] = segIndex s.segments[segmentID] = segment } func (s *SegmentsInfo) DropSegmentIndex(segmentID UniqueID, indexID UniqueID) { if _, ok := s.segments[segmentID]; ok { delete(s.segments[segmentID].segmentIndexes, indexID) } } // SetRowCount sets rowCount info for SegmentInfo with provided segmentID // if SegmentInfo not found, do nothing func (s *SegmentsInfo) SetRowCount(segmentID UniqueID, rowCount int64) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(SetRowCount(rowCount)) } } // SetState sets Segment State info for SegmentInfo with provided segmentID // if SegmentInfo not found, do nothing func (s *SegmentsInfo) SetState(segmentID UniqueID, state commonpb.SegmentState) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(SetState(state)) } } // SetIsImporting sets the import status for a segment. func (s *SegmentsInfo) SetIsImporting(segmentID UniqueID, isImporting bool) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(SetIsImporting(isImporting)) } } // SetDmlPosition sets DmlPosition info (checkpoint for recovery) for SegmentInfo with provided segmentID // if SegmentInfo not found, do nothing func (s *SegmentsInfo) SetDmlPosition(segmentID UniqueID, pos *msgpb.MsgPosition) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(SetDmlPosition(pos)) } } // SetStartPosition sets StartPosition info (recovery info when no checkout point found) for SegmentInfo with provided segmentID // if SegmentInfo not found, do nothing func (s *SegmentsInfo) SetStartPosition(segmentID UniqueID, pos *msgpb.MsgPosition) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(SetStartPosition(pos)) } } // SetAllocations sets allocations for segment with specified id // if the segment id is not found, do nothing // uses `ShadowClone` since internal SegmentInfo is not changed func (s *SegmentsInfo) SetAllocations(segmentID UniqueID, allocations []*Allocation) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.ShadowClone(SetAllocations(allocations)) } } // AddAllocation adds a new allocation to specified segment // if the segment is not found, do nothing // uses `Clone` since internal SegmentInfo's LastExpireTime is changed func (s *SegmentsInfo) AddAllocation(segmentID UniqueID, allocation *Allocation) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(AddAllocation(allocation)) } } // SetCurrentRows sets rows count for segment // if the segment is not found, do nothing // uses `ShadowClone` since internal SegmentInfo is not changed func (s *SegmentsInfo) SetCurrentRows(segmentID UniqueID, rows int64) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.ShadowClone(SetCurrentRows(rows)) } } // SetBinlogs sets binlog paths for segment // if the segment is not found, do nothing // uses `Clone` since internal SegmentInfo's Binlogs is changed func (s *SegmentsInfo) SetBinlogs(segmentID UniqueID, binlogs []*datapb.FieldBinlog) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(SetBinlogs(binlogs)) } } // SetFlushTime sets flush time for segment // if the segment is not found, do nothing // uses `ShadowClone` since internal SegmentInfo is not changed func (s *SegmentsInfo) SetFlushTime(segmentID UniqueID, t time.Time) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.ShadowClone(SetFlushTime(t)) } } // AddSegmentBinlogs adds binlogs for segment // if the segment is not found, do nothing // uses `Clone` since internal SegmentInfo's Binlogs is changed func (s *SegmentsInfo) AddSegmentBinlogs(segmentID UniqueID, field2Binlogs map[UniqueID][]*datapb.Binlog) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.Clone(addSegmentBinlogs(field2Binlogs)) } } // SetIsCompacting sets compaction status for segment func (s *SegmentsInfo) SetIsCompacting(segmentID UniqueID, isCompacting bool) { if segment, ok := s.segments[segmentID]; ok { s.segments[segmentID] = segment.ShadowClone(SetIsCompacting(isCompacting)) } } // Clone deep clone the segment info and return a new instance func (s *SegmentInfo) Clone(opts ...SegmentInfoOption) *SegmentInfo { info := proto.Clone(s.SegmentInfo).(*datapb.SegmentInfo) segmentIndexes := make(map[UniqueID]*model.SegmentIndex, len(s.segmentIndexes)) for indexID, segIdx := range s.segmentIndexes { segmentIndexes[indexID] = model.CloneSegmentIndex(segIdx) } cloned := &SegmentInfo{ SegmentInfo: info, segmentIndexes: segmentIndexes, currRows: s.currRows, allocations: s.allocations, lastFlushTime: s.lastFlushTime, isCompacting: s.isCompacting, //cannot copy size, since binlog may be changed lastWrittenTime: s.lastWrittenTime, } for _, opt := range opts { opt(cloned) } return cloned } // ShadowClone shadow clone the segment and return a new instance func (s *SegmentInfo) ShadowClone(opts ...SegmentInfoOption) *SegmentInfo { segmentIndexes := make(map[UniqueID]*model.SegmentIndex, len(s.segmentIndexes)) for indexID, segIdx := range s.segmentIndexes { segmentIndexes[indexID] = model.CloneSegmentIndex(segIdx) } cloned := &SegmentInfo{ SegmentInfo: s.SegmentInfo, segmentIndexes: segmentIndexes, currRows: s.currRows, allocations: s.allocations, lastFlushTime: s.lastFlushTime, isCompacting: s.isCompacting, size: s.size, lastWrittenTime: s.lastWrittenTime, } for _, opt := range opts { opt(cloned) } return cloned } // SegmentInfoOption is the option to set fields in segment info type SegmentInfoOption func(segment *SegmentInfo) // SetRowCount is the option to set row count for segment info func SetRowCount(rowCount int64) SegmentInfoOption { return func(segment *SegmentInfo) { segment.NumOfRows = rowCount } } // SetExpireTime is the option to set expire time for segment info func SetExpireTime(expireTs Timestamp) SegmentInfoOption { return func(segment *SegmentInfo) { segment.LastExpireTime = expireTs } } // SetState is the option to set state for segment info func SetState(state commonpb.SegmentState) SegmentInfoOption { return func(segment *SegmentInfo) { segment.State = state } } // SetIsImporting is the option to set import state for segment info. func SetIsImporting(isImporting bool) SegmentInfoOption { return func(segment *SegmentInfo) { segment.IsImporting = isImporting } } // SetDmlPosition is the option to set dml position for segment info func SetDmlPosition(pos *msgpb.MsgPosition) SegmentInfoOption { return func(segment *SegmentInfo) { segment.DmlPosition = pos } } // SetStartPosition is the option to set start position for segment info func SetStartPosition(pos *msgpb.MsgPosition) SegmentInfoOption { return func(segment *SegmentInfo) { segment.StartPosition = pos } } // SetAllocations is the option to set allocations for segment info func SetAllocations(allocations []*Allocation) SegmentInfoOption { return func(segment *SegmentInfo) { segment.allocations = allocations } } // AddAllocation is the option to add allocation info for segment info func AddAllocation(allocation *Allocation) SegmentInfoOption { return func(segment *SegmentInfo) { segment.allocations = append(segment.allocations, allocation) segment.LastExpireTime = allocation.ExpireTime } } // SetCurrentRows is the option to set current row count for segment info func SetCurrentRows(rows int64) SegmentInfoOption { return func(segment *SegmentInfo) { segment.currRows = rows segment.lastWrittenTime = time.Now() } } // SetBinlogs is the option to set binlogs for segment info func SetBinlogs(binlogs []*datapb.FieldBinlog) SegmentInfoOption { return func(segment *SegmentInfo) { segment.Binlogs = binlogs } } // SetFlushTime is the option to set flush time for segment info func SetFlushTime(t time.Time) SegmentInfoOption { return func(segment *SegmentInfo) { segment.lastFlushTime = t } } // SetIsCompacting is the option to set compaction state for segment info func SetIsCompacting(isCompacting bool) SegmentInfoOption { return func(segment *SegmentInfo) { segment.isCompacting = isCompacting } } func addSegmentBinlogs(field2Binlogs map[UniqueID][]*datapb.Binlog) SegmentInfoOption { return func(segment *SegmentInfo) { for fieldID, binlogPaths := range field2Binlogs { found := false for _, binlog := range segment.Binlogs { if binlog.FieldID != fieldID { continue } binlog.Binlogs = append(binlog.Binlogs, binlogPaths...) found = true break } if !found { // if no field matched segment.Binlogs = append(segment.Binlogs, &datapb.FieldBinlog{ FieldID: fieldID, Binlogs: binlogPaths, }) } } } } func (s *SegmentInfo) getSegmentSize() int64 { if s.size <= 0 { var size int64 for _, binlogs := range s.GetBinlogs() { for _, l := range binlogs.GetBinlogs() { size += l.GetLogSize() } } for _, deltaLogs := range s.GetDeltalogs() { for _, l := range deltaLogs.GetBinlogs() { size += l.GetLogSize() } } for _, statsLogs := range s.GetStatslogs() { for _, l := range statsLogs.GetBinlogs() { size += l.GetLogSize() } } s.size = size } return s.size } // SegmentInfoSelector is the function type to select SegmentInfo from meta type SegmentInfoSelector func(*SegmentInfo) bool