mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-04 21:09:06 +08:00
4939f82d4f
Seals the largest growing segment if the total size of growing segments of each shard exceeds the size threshold(default 4GB). Introducing this policy can help keep the size of growing segments within a suitable level, alleviating the pressure on the delegator. issue: https://github.com/milvus-io/milvus/issues/34554 --------- Signed-off-by: bigsheeper <yihao.dai@zilliz.com>
519 lines
16 KiB
Go
519 lines
16 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datacoord
|
|
|
|
import (
|
|
"time"
|
|
|
|
"github.com/golang/protobuf/proto"
|
|
"github.com/samber/lo"
|
|
"go.uber.org/atomic"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/msgpb"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
)
|
|
|
|
// SegmentsInfo wraps a map, which maintains ID to SegmentInfo relation
|
|
type SegmentsInfo struct {
|
|
segments map[UniqueID]*SegmentInfo
|
|
secondaryIndexes segmentInfoIndexes
|
|
compactionTo map[UniqueID]UniqueID // map the compact relation, value is the segment which `CompactFrom` contains key.
|
|
// A segment can be compacted to only one segment finally in meta.
|
|
}
|
|
|
|
type segmentInfoIndexes struct {
|
|
coll2Segments map[UniqueID]map[UniqueID]*SegmentInfo
|
|
channel2Segments map[string]map[UniqueID]*SegmentInfo
|
|
}
|
|
|
|
// SegmentInfo wraps datapb.SegmentInfo and patches some extra info on it
|
|
type SegmentInfo struct {
|
|
*datapb.SegmentInfo
|
|
currRows int64
|
|
allocations []*Allocation
|
|
lastFlushTime time.Time
|
|
isCompacting bool
|
|
// a cache to avoid calculate twice
|
|
size atomic.Int64
|
|
deltaRowcount atomic.Int64
|
|
lastWrittenTime time.Time
|
|
}
|
|
|
|
// NewSegmentInfo create `SegmentInfo` wrapper from `datapb.SegmentInfo`
|
|
// assign current rows to last checkpoint and pre-allocate `allocations` slice
|
|
// Note that the allocation information is not preserved,
|
|
// the worst case scenario is to have a segment with twice size we expects
|
|
func NewSegmentInfo(info *datapb.SegmentInfo) *SegmentInfo {
|
|
s := &SegmentInfo{
|
|
SegmentInfo: info,
|
|
currRows: info.GetNumOfRows(),
|
|
}
|
|
// setup growing fields
|
|
if s.GetState() == commonpb.SegmentState_Growing {
|
|
s.allocations = make([]*Allocation, 0, 16)
|
|
s.lastFlushTime = time.Now().Add(-1 * paramtable.Get().DataCoordCfg.SegmentFlushInterval.GetAsDuration(time.Second))
|
|
// A growing segment from recovery can be also considered idle.
|
|
s.lastWrittenTime = getZeroTime()
|
|
}
|
|
// mark as uninitialized
|
|
s.deltaRowcount.Store(-1)
|
|
return s
|
|
}
|
|
|
|
// NewSegmentsInfo creates a `SegmentsInfo` instance, which makes sure internal map is initialized
|
|
// note that no mutex is wrapped so external concurrent control is needed
|
|
func NewSegmentsInfo() *SegmentsInfo {
|
|
return &SegmentsInfo{
|
|
segments: make(map[UniqueID]*SegmentInfo),
|
|
secondaryIndexes: segmentInfoIndexes{
|
|
coll2Segments: make(map[UniqueID]map[UniqueID]*SegmentInfo),
|
|
channel2Segments: make(map[string]map[UniqueID]*SegmentInfo),
|
|
},
|
|
compactionTo: make(map[UniqueID]UniqueID),
|
|
}
|
|
}
|
|
|
|
// GetSegment returns SegmentInfo
|
|
// the logPath in meta is empty
|
|
func (s *SegmentsInfo) GetSegment(segmentID UniqueID) *SegmentInfo {
|
|
segment, ok := s.segments[segmentID]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
return segment
|
|
}
|
|
|
|
// GetSegments iterates internal map and returns all SegmentInfo in a slice
|
|
// no deep copy applied
|
|
// the logPath in meta is empty
|
|
func (s *SegmentsInfo) GetSegments() []*SegmentInfo {
|
|
return lo.Values(s.segments)
|
|
}
|
|
|
|
func (s *SegmentsInfo) getCandidates(criterion *segmentCriterion) map[UniqueID]*SegmentInfo {
|
|
if criterion.collectionID > 0 {
|
|
collSegments, ok := s.secondaryIndexes.coll2Segments[criterion.collectionID]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
|
|
// both collection id and channel are filters of criterion
|
|
if criterion.channel != "" {
|
|
return lo.OmitBy(collSegments, func(k UniqueID, v *SegmentInfo) bool {
|
|
return v.InsertChannel != criterion.channel
|
|
})
|
|
}
|
|
return collSegments
|
|
}
|
|
|
|
if criterion.channel != "" {
|
|
channelSegments, ok := s.secondaryIndexes.channel2Segments[criterion.channel]
|
|
if !ok {
|
|
return nil
|
|
}
|
|
return channelSegments
|
|
}
|
|
|
|
return s.segments
|
|
}
|
|
|
|
func (s *SegmentsInfo) GetSegmentsBySelector(filters ...SegmentFilter) []*SegmentInfo {
|
|
criterion := &segmentCriterion{}
|
|
for _, filter := range filters {
|
|
filter.AddFilter(criterion)
|
|
}
|
|
|
|
// apply criterion
|
|
candidates := s.getCandidates(criterion)
|
|
var result []*SegmentInfo
|
|
for _, segment := range candidates {
|
|
if criterion.Match(segment) {
|
|
result = append(result, segment)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func (s *SegmentsInfo) GetRealSegmentsForChannel(channel string) []*SegmentInfo {
|
|
channelSegments := s.secondaryIndexes.channel2Segments[channel]
|
|
var result []*SegmentInfo
|
|
for _, segment := range channelSegments {
|
|
if !segment.GetIsFake() {
|
|
result = append(result, segment)
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
// GetCompactionTo returns the segment that the provided segment is compacted to.
|
|
// Return (nil, false) if given segmentID can not found in the meta.
|
|
// Return (nil, true) if given segmentID can be found not no compaction to.
|
|
// Return (notnil, true) if given segmentID can be found and has compaction to.
|
|
func (s *SegmentsInfo) GetCompactionTo(fromSegmentID int64) (*SegmentInfo, bool) {
|
|
if _, ok := s.segments[fromSegmentID]; !ok {
|
|
return nil, false
|
|
}
|
|
if toID, ok := s.compactionTo[fromSegmentID]; ok {
|
|
if to, ok := s.segments[toID]; ok {
|
|
return to, true
|
|
}
|
|
log.Warn("unreachable code: compactionTo relation is broken", zap.Int64("from", fromSegmentID), zap.Int64("to", toID))
|
|
}
|
|
return nil, true
|
|
}
|
|
|
|
// DropSegment deletes provided segmentID
|
|
// no extra method is taken when segmentID not exists
|
|
func (s *SegmentsInfo) DropSegment(segmentID UniqueID) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.deleteCompactTo(segment)
|
|
s.removeSecondaryIndex(segment)
|
|
delete(s.segments, segmentID)
|
|
}
|
|
}
|
|
|
|
// SetSegment sets SegmentInfo with segmentID, perform overwrite if already exists
|
|
// set the logPath of segment in meta empty, to save space
|
|
// if segment has logPath, make it empty
|
|
func (s *SegmentsInfo) SetSegment(segmentID UniqueID, segment *SegmentInfo) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
// Remove old segment compact to relation first.
|
|
s.deleteCompactTo(segment)
|
|
s.removeSecondaryIndex(segment)
|
|
}
|
|
s.segments[segmentID] = segment
|
|
s.addSecondaryIndex(segment)
|
|
s.addCompactTo(segment)
|
|
}
|
|
|
|
// SetRowCount sets rowCount info for SegmentInfo with provided segmentID
|
|
// if SegmentInfo not found, do nothing
|
|
func (s *SegmentsInfo) SetRowCount(segmentID UniqueID, rowCount int64) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.Clone(SetRowCount(rowCount))
|
|
}
|
|
}
|
|
|
|
// SetState sets Segment State info for SegmentInfo with provided segmentID
|
|
// if SegmentInfo not found, do nothing
|
|
func (s *SegmentsInfo) SetState(segmentID UniqueID, state commonpb.SegmentState) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.Clone(SetState(state))
|
|
}
|
|
}
|
|
|
|
// SetDmlPosition sets DmlPosition info (checkpoint for recovery) for SegmentInfo with provided segmentID
|
|
// if SegmentInfo not found, do nothing
|
|
func (s *SegmentsInfo) SetDmlPosition(segmentID UniqueID, pos *msgpb.MsgPosition) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.Clone(SetDmlPosition(pos))
|
|
}
|
|
}
|
|
|
|
// SetStartPosition sets StartPosition info (recovery info when no checkout point found) for SegmentInfo with provided segmentID
|
|
// if SegmentInfo not found, do nothing
|
|
func (s *SegmentsInfo) SetStartPosition(segmentID UniqueID, pos *msgpb.MsgPosition) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.Clone(SetStartPosition(pos))
|
|
}
|
|
}
|
|
|
|
// SetAllocations sets allocations for segment with specified id
|
|
// if the segment id is not found, do nothing
|
|
// uses `ShadowClone` since internal SegmentInfo is not changed
|
|
func (s *SegmentsInfo) SetAllocations(segmentID UniqueID, allocations []*Allocation) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.ShadowClone(SetAllocations(allocations))
|
|
}
|
|
}
|
|
|
|
// AddAllocation adds a new allocation to specified segment
|
|
// if the segment is not found, do nothing
|
|
// uses `Clone` since internal SegmentInfo's LastExpireTime is changed
|
|
func (s *SegmentsInfo) AddAllocation(segmentID UniqueID, allocation *Allocation) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.Clone(AddAllocation(allocation))
|
|
}
|
|
}
|
|
|
|
// SetCurrentRows sets rows count for segment
|
|
// if the segment is not found, do nothing
|
|
// uses `ShadowClone` since internal SegmentInfo is not changed
|
|
func (s *SegmentsInfo) SetCurrentRows(segmentID UniqueID, rows int64) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.ShadowClone(SetCurrentRows(rows))
|
|
}
|
|
}
|
|
|
|
// SetFlushTime sets flush time for segment
|
|
// if the segment is not found, do nothing
|
|
// uses `ShadowClone` since internal SegmentInfo is not changed
|
|
func (s *SegmentsInfo) SetFlushTime(segmentID UniqueID, t time.Time) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.ShadowClone(SetFlushTime(t))
|
|
}
|
|
}
|
|
|
|
// SetIsCompacting sets compaction status for segment
|
|
func (s *SegmentsInfo) SetIsCompacting(segmentID UniqueID, isCompacting bool) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.ShadowClone(SetIsCompacting(isCompacting))
|
|
}
|
|
}
|
|
|
|
func (s *SegmentInfo) IsDeltaLogExists(logID int64) bool {
|
|
for _, deltaLogs := range s.GetDeltalogs() {
|
|
for _, l := range deltaLogs.GetBinlogs() {
|
|
if l.GetLogID() == logID {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
func (s *SegmentInfo) IsStatsLogExists(logID int64) bool {
|
|
for _, statsLogs := range s.GetStatslogs() {
|
|
for _, l := range statsLogs.GetBinlogs() {
|
|
if l.GetLogID() == logID {
|
|
return true
|
|
}
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// SetLevel sets level for segment
|
|
func (s *SegmentsInfo) SetLevel(segmentID UniqueID, level datapb.SegmentLevel) {
|
|
if segment, ok := s.segments[segmentID]; ok {
|
|
s.segments[segmentID] = segment.ShadowClone(SetLevel(level))
|
|
}
|
|
}
|
|
|
|
// Clone deep clone the segment info and return a new instance
|
|
func (s *SegmentInfo) Clone(opts ...SegmentInfoOption) *SegmentInfo {
|
|
info := proto.Clone(s.SegmentInfo).(*datapb.SegmentInfo)
|
|
cloned := &SegmentInfo{
|
|
SegmentInfo: info,
|
|
currRows: s.currRows,
|
|
allocations: s.allocations,
|
|
lastFlushTime: s.lastFlushTime,
|
|
isCompacting: s.isCompacting,
|
|
// cannot copy size, since binlog may be changed
|
|
lastWrittenTime: s.lastWrittenTime,
|
|
}
|
|
for _, opt := range opts {
|
|
opt(cloned)
|
|
}
|
|
return cloned
|
|
}
|
|
|
|
// ShadowClone shadow clone the segment and return a new instance
|
|
func (s *SegmentInfo) ShadowClone(opts ...SegmentInfoOption) *SegmentInfo {
|
|
cloned := &SegmentInfo{
|
|
SegmentInfo: s.SegmentInfo,
|
|
currRows: s.currRows,
|
|
allocations: s.allocations,
|
|
lastFlushTime: s.lastFlushTime,
|
|
isCompacting: s.isCompacting,
|
|
lastWrittenTime: s.lastWrittenTime,
|
|
}
|
|
cloned.size.Store(s.size.Load())
|
|
cloned.deltaRowcount.Store(s.deltaRowcount.Load())
|
|
|
|
for _, opt := range opts {
|
|
opt(cloned)
|
|
}
|
|
return cloned
|
|
}
|
|
|
|
func (s *SegmentsInfo) addSecondaryIndex(segment *SegmentInfo) {
|
|
collID := segment.GetCollectionID()
|
|
channel := segment.GetInsertChannel()
|
|
if _, ok := s.secondaryIndexes.coll2Segments[collID]; !ok {
|
|
s.secondaryIndexes.coll2Segments[collID] = make(map[UniqueID]*SegmentInfo)
|
|
}
|
|
s.secondaryIndexes.coll2Segments[collID][segment.ID] = segment
|
|
|
|
if _, ok := s.secondaryIndexes.channel2Segments[channel]; !ok {
|
|
s.secondaryIndexes.channel2Segments[channel] = make(map[UniqueID]*SegmentInfo)
|
|
}
|
|
s.secondaryIndexes.channel2Segments[channel][segment.ID] = segment
|
|
}
|
|
|
|
func (s *SegmentsInfo) removeSecondaryIndex(segment *SegmentInfo) {
|
|
collID := segment.GetCollectionID()
|
|
channel := segment.GetInsertChannel()
|
|
if segments, ok := s.secondaryIndexes.coll2Segments[collID]; ok {
|
|
delete(segments, segment.ID)
|
|
if len(segments) == 0 {
|
|
delete(s.secondaryIndexes.coll2Segments, collID)
|
|
}
|
|
}
|
|
|
|
if segments, ok := s.secondaryIndexes.channel2Segments[channel]; ok {
|
|
delete(segments, segment.ID)
|
|
if len(segments) == 0 {
|
|
delete(s.secondaryIndexes.channel2Segments, channel)
|
|
}
|
|
}
|
|
}
|
|
|
|
// addCompactTo adds the compact relation to the segment
|
|
func (s *SegmentsInfo) addCompactTo(segment *SegmentInfo) {
|
|
for _, from := range segment.GetCompactionFrom() {
|
|
s.compactionTo[from] = segment.GetID()
|
|
}
|
|
}
|
|
|
|
// deleteCompactTo deletes the compact relation to the segment
|
|
func (s *SegmentsInfo) deleteCompactTo(segment *SegmentInfo) {
|
|
for _, from := range segment.GetCompactionFrom() {
|
|
delete(s.compactionTo, from)
|
|
}
|
|
}
|
|
|
|
// SegmentInfoOption is the option to set fields in segment info
|
|
type SegmentInfoOption func(segment *SegmentInfo)
|
|
|
|
// SetRowCount is the option to set row count for segment info
|
|
func SetRowCount(rowCount int64) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.NumOfRows = rowCount
|
|
}
|
|
}
|
|
|
|
// SetExpireTime is the option to set expire time for segment info
|
|
func SetExpireTime(expireTs Timestamp) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.LastExpireTime = expireTs
|
|
}
|
|
}
|
|
|
|
// SetState is the option to set state for segment info
|
|
func SetState(state commonpb.SegmentState) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.State = state
|
|
}
|
|
}
|
|
|
|
// SetDmlPosition is the option to set dml position for segment info
|
|
func SetDmlPosition(pos *msgpb.MsgPosition) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.DmlPosition = pos
|
|
}
|
|
}
|
|
|
|
// SetStartPosition is the option to set start position for segment info
|
|
func SetStartPosition(pos *msgpb.MsgPosition) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.StartPosition = pos
|
|
}
|
|
}
|
|
|
|
// SetAllocations is the option to set allocations for segment info
|
|
func SetAllocations(allocations []*Allocation) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.allocations = allocations
|
|
}
|
|
}
|
|
|
|
// AddAllocation is the option to add allocation info for segment info
|
|
func AddAllocation(allocation *Allocation) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.allocations = append(segment.allocations, allocation)
|
|
segment.LastExpireTime = allocation.ExpireTime
|
|
}
|
|
}
|
|
|
|
// SetCurrentRows is the option to set current row count for segment info
|
|
func SetCurrentRows(rows int64) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.currRows = rows
|
|
segment.lastWrittenTime = time.Now()
|
|
}
|
|
}
|
|
|
|
// SetFlushTime is the option to set flush time for segment info
|
|
func SetFlushTime(t time.Time) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.lastFlushTime = t
|
|
}
|
|
}
|
|
|
|
// SetIsCompacting is the option to set compaction state for segment info
|
|
func SetIsCompacting(isCompacting bool) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.isCompacting = isCompacting
|
|
}
|
|
}
|
|
|
|
// SetLevel is the option to set level for segment info
|
|
func SetLevel(level datapb.SegmentLevel) SegmentInfoOption {
|
|
return func(segment *SegmentInfo) {
|
|
segment.Level = level
|
|
}
|
|
}
|
|
|
|
func (s *SegmentInfo) getSegmentSize() int64 {
|
|
if s.size.Load() <= 0 || s.GetState() == commonpb.SegmentState_Growing {
|
|
var size int64
|
|
for _, binlogs := range s.GetBinlogs() {
|
|
for _, l := range binlogs.GetBinlogs() {
|
|
size += l.GetMemorySize()
|
|
}
|
|
}
|
|
|
|
for _, deltaLogs := range s.GetDeltalogs() {
|
|
for _, l := range deltaLogs.GetBinlogs() {
|
|
size += l.GetMemorySize()
|
|
}
|
|
}
|
|
|
|
for _, statsLogs := range s.GetStatslogs() {
|
|
for _, l := range statsLogs.GetBinlogs() {
|
|
size += l.GetMemorySize()
|
|
}
|
|
}
|
|
if size > 0 {
|
|
s.size.Store(size)
|
|
}
|
|
}
|
|
return s.size.Load()
|
|
}
|
|
|
|
func (s *SegmentInfo) getDeltaCount() int64 {
|
|
if s.deltaRowcount.Load() < 0 {
|
|
var rc int64
|
|
for _, deltaLogs := range s.GetDeltalogs() {
|
|
for _, l := range deltaLogs.GetBinlogs() {
|
|
rc += l.GetEntriesNum()
|
|
}
|
|
}
|
|
s.deltaRowcount.Store(rc)
|
|
}
|
|
r := s.deltaRowcount.Load()
|
|
return r
|
|
}
|
|
|
|
// SegmentInfoSelector is the function type to select SegmentInfo from meta
|
|
type SegmentInfoSelector func(*SegmentInfo) bool
|