mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 11:59:00 +08:00
ac8c5fcd5d
issue: #36686 This pr will remove pre-marking segments as L2 during clustering compaction in version 2.5, and ensure compatibility with version 2.4. The core of this change is to **ensure that the many-to-many lineage derivation logic is correct, making sure that both the parent and child cannot simultaneously exist in the target segment view.** feature: - Clustering compaction no longer marks the input segments as L2. - Add a new field `is_invisible` to `segmentInfo`, and mark segments that have completed clustering but have not yet built indexes as `is_invisible` to prevent them from being loaded prematurely." - Do not mark the input segment as `Dropped` before the clustering compaction is completed. - After compaction fails, only the result segment needs to be marked as Dropped. compatibility: - If the upgraded task has not failed, there are no compatibility issues. - If the status after the upgrade is `MetaSaved`, then skip the stats task based on whether TmpSegments is empty. - If the failure occurs before `MetaSaved`: - there are no ResultSegments, and InputSegments have not been marked as dropped yet. - the level of input segments need to revert to LastLevel - If the failure occurs after `MetaSaved`: - ResultSegments have already been generated, and InputSegments have been marked as Dropped. At this point, simply make the ResultSegments visible. - the level of ResultSegments needs to be set to L1(in order to participate in mixCompaction) --------- Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
314 lines
11 KiB
Go
314 lines
11 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datacoord
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/datacoord/allocator"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/util/clustering"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
)
|
|
|
|
type clusteringCompactionPolicy struct {
|
|
meta *meta
|
|
allocator allocator.Allocator
|
|
handler Handler
|
|
}
|
|
|
|
func newClusteringCompactionPolicy(meta *meta, allocator allocator.Allocator, handler Handler) *clusteringCompactionPolicy {
|
|
return &clusteringCompactionPolicy{meta: meta, allocator: allocator, handler: handler}
|
|
}
|
|
|
|
func (policy *clusteringCompactionPolicy) Enable() bool {
|
|
return Params.DataCoordCfg.EnableAutoCompaction.GetAsBool() &&
|
|
Params.DataCoordCfg.ClusteringCompactionEnable.GetAsBool() &&
|
|
Params.DataCoordCfg.ClusteringCompactionAutoEnable.GetAsBool()
|
|
}
|
|
|
|
func (policy *clusteringCompactionPolicy) Trigger() (map[CompactionTriggerType][]CompactionView, error) {
|
|
log.Info("start trigger clusteringCompactionPolicy...")
|
|
ctx := context.Background()
|
|
collections := policy.meta.GetCollections()
|
|
|
|
events := make(map[CompactionTriggerType][]CompactionView, 0)
|
|
views := make([]CompactionView, 0)
|
|
for _, collection := range collections {
|
|
collectionViews, _, err := policy.triggerOneCollection(ctx, collection.ID, false)
|
|
if err != nil {
|
|
// not throw this error because no need to fail because of one collection
|
|
log.Warn("fail to trigger collection clustering compaction", zap.Int64("collectionID", collection.ID), zap.Error(err))
|
|
}
|
|
views = append(views, collectionViews...)
|
|
}
|
|
events[TriggerTypeClustering] = views
|
|
return events, nil
|
|
}
|
|
|
|
// todo: remove this check after support partial clustering compaction
|
|
func (policy *clusteringCompactionPolicy) checkAllL2SegmentsContains(ctx context.Context, collectionID, partitionID int64, channel string) bool {
|
|
getCompactingL2Segment := func(segment *SegmentInfo) bool {
|
|
return segment.CollectionID == collectionID &&
|
|
segment.PartitionID == partitionID &&
|
|
segment.InsertChannel == channel &&
|
|
isSegmentHealthy(segment) &&
|
|
segment.GetLevel() == datapb.SegmentLevel_L2 &&
|
|
segment.isCompacting
|
|
}
|
|
segments := policy.meta.SelectSegments(SegmentFilterFunc(getCompactingL2Segment))
|
|
if len(segments) > 0 {
|
|
log.Ctx(ctx).Info("there are some segments are compacting",
|
|
zap.Int64("collectionID", collectionID), zap.Int64("partitionID", partitionID),
|
|
zap.String("channel", channel), zap.Int64s("compacting segment", lo.Map(segments, func(segment *SegmentInfo, i int) int64 {
|
|
return segment.GetID()
|
|
})))
|
|
return false
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (policy *clusteringCompactionPolicy) triggerOneCollection(ctx context.Context, collectionID int64, manual bool) ([]CompactionView, int64, error) {
|
|
log := log.With(zap.Int64("collectionID", collectionID))
|
|
log.Info("start trigger collection clustering compaction")
|
|
collection, err := policy.handler.GetCollection(ctx, collectionID)
|
|
if err != nil {
|
|
log.Warn("fail to get collection from handler")
|
|
return nil, 0, err
|
|
}
|
|
if collection == nil {
|
|
log.Warn("collection not exist")
|
|
return nil, 0, nil
|
|
}
|
|
clusteringKeyField := clustering.GetClusteringKeyField(collection.Schema)
|
|
if clusteringKeyField == nil {
|
|
log.Info("the collection has no clustering key, skip tigger clustering compaction")
|
|
return nil, 0, nil
|
|
}
|
|
|
|
compacting, triggerID := policy.collectionIsClusteringCompacting(collection.ID)
|
|
if compacting {
|
|
log.Info("collection is clustering compacting", zap.Int64("triggerID", triggerID))
|
|
return nil, triggerID, nil
|
|
}
|
|
|
|
newTriggerID, err := policy.allocator.AllocID(ctx)
|
|
if err != nil {
|
|
log.Warn("fail to allocate triggerID", zap.Error(err))
|
|
return nil, 0, err
|
|
}
|
|
|
|
partSegments := policy.meta.GetSegmentsChanPart(func(segment *SegmentInfo) bool {
|
|
return segment.CollectionID == collectionID &&
|
|
isSegmentHealthy(segment) &&
|
|
isFlush(segment) &&
|
|
!segment.isCompacting && // not compacting now
|
|
!segment.GetIsImporting() && // not importing now
|
|
segment.GetLevel() != datapb.SegmentLevel_L0 && // ignore level zero segments
|
|
!segment.GetIsInvisible()
|
|
})
|
|
|
|
views := make([]CompactionView, 0)
|
|
// partSegments is list of chanPartSegments, which is channel-partition organized segments
|
|
for _, group := range partSegments {
|
|
log := log.With(zap.Int64("partitionID", group.partitionID), zap.String("channel", group.channelName))
|
|
|
|
if !policy.checkAllL2SegmentsContains(ctx, group.collectionID, group.partitionID, group.channelName) {
|
|
log.Warn("clustering compaction cannot be done, otherwise the performance will fall back")
|
|
continue
|
|
}
|
|
|
|
collectionTTL, err := getCollectionTTL(collection.Properties)
|
|
if err != nil {
|
|
log.Warn("get collection ttl failed, skip to handle compaction")
|
|
return make([]CompactionView, 0), 0, err
|
|
}
|
|
|
|
if len(group.segments) == 0 {
|
|
log.Info("the length of SegmentsChanPart is 0, skip to handle compaction")
|
|
continue
|
|
}
|
|
|
|
if !manual {
|
|
execute, err := triggerClusteringCompactionPolicy(ctx, policy.meta, group.collectionID, group.partitionID, group.channelName, group.segments)
|
|
if err != nil {
|
|
log.Warn("failed to trigger clustering compaction", zap.Error(err))
|
|
continue
|
|
}
|
|
if !execute {
|
|
continue
|
|
}
|
|
}
|
|
|
|
segmentViews := GetViewsByInfo(group.segments...)
|
|
view := &ClusteringSegmentsView{
|
|
label: segmentViews[0].label,
|
|
segments: segmentViews,
|
|
clusteringKeyField: clusteringKeyField,
|
|
collectionTTL: collectionTTL,
|
|
triggerID: newTriggerID,
|
|
}
|
|
views = append(views, view)
|
|
}
|
|
|
|
log.Info("finish trigger collection clustering compaction", zap.Int("viewNum", len(views)))
|
|
return views, newTriggerID, nil
|
|
}
|
|
|
|
func (policy *clusteringCompactionPolicy) collectionIsClusteringCompacting(collectionID UniqueID) (bool, int64) {
|
|
triggers := policy.meta.compactionTaskMeta.GetCompactionTasksByCollection(collectionID)
|
|
if len(triggers) == 0 {
|
|
return false, 0
|
|
}
|
|
var latestTriggerID int64 = 0
|
|
for triggerID := range triggers {
|
|
if triggerID > latestTriggerID {
|
|
latestTriggerID = triggerID
|
|
}
|
|
}
|
|
tasks := triggers[latestTriggerID]
|
|
if len(tasks) > 0 {
|
|
cTasks := tasks
|
|
summary := summaryCompactionState(cTasks)
|
|
return summary.state == commonpb.CompactionState_Executing, cTasks[0].TriggerID
|
|
}
|
|
return false, 0
|
|
}
|
|
|
|
func calculateClusteringCompactionConfig(coll *collectionInfo, view CompactionView, expectedSegmentSize int64) (totalRows, maxSegmentRows, preferSegmentRows int64, err error) {
|
|
for _, s := range view.GetSegmentsView() {
|
|
totalRows += s.NumOfRows
|
|
}
|
|
clusteringMaxSegmentSizeRatio := paramtable.Get().DataCoordCfg.ClusteringCompactionMaxSegmentSizeRatio.GetAsFloat()
|
|
clusteringPreferSegmentSizeRatio := paramtable.Get().DataCoordCfg.ClusteringCompactionPreferSegmentSizeRatio.GetAsFloat()
|
|
|
|
maxRows, err := calBySegmentSizePolicy(coll.Schema, expectedSegmentSize)
|
|
if err != nil {
|
|
return 0, 0, 0, err
|
|
}
|
|
maxSegmentRows = int64(float64(maxRows) * clusteringMaxSegmentSizeRatio)
|
|
preferSegmentRows = int64(float64(maxRows) * clusteringPreferSegmentSizeRatio)
|
|
return
|
|
}
|
|
|
|
func triggerClusteringCompactionPolicy(ctx context.Context, meta *meta, collectionID int64, partitionID int64, channel string, segments []*SegmentInfo) (bool, error) {
|
|
log := log.With(zap.Int64("collectionID", collectionID), zap.Int64("partitionID", partitionID))
|
|
currentVersion := meta.partitionStatsMeta.GetCurrentPartitionStatsVersion(collectionID, partitionID, channel)
|
|
if currentVersion == 0 {
|
|
var newDataSize int64 = 0
|
|
for _, seg := range segments {
|
|
newDataSize += seg.getSegmentSize()
|
|
}
|
|
if newDataSize > Params.DataCoordCfg.ClusteringCompactionNewDataSizeThreshold.GetAsSize() {
|
|
log.Info("New data is larger than threshold, do compaction", zap.Int64("newDataSize", newDataSize))
|
|
return true, nil
|
|
}
|
|
log.Info("No partition stats and no enough new data, skip compaction", zap.Int64("newDataSize", newDataSize))
|
|
return false, nil
|
|
}
|
|
|
|
partitionStats := meta.GetPartitionStatsMeta().GetPartitionStats(collectionID, partitionID, channel, currentVersion)
|
|
if partitionStats == nil {
|
|
log.Info("partition stats not found")
|
|
return false, nil
|
|
}
|
|
timestampSeconds := partitionStats.GetCommitTime()
|
|
pTime := time.Unix(timestampSeconds, 0)
|
|
if time.Since(pTime) < Params.DataCoordCfg.ClusteringCompactionMinInterval.GetAsDuration(time.Second) {
|
|
log.Info("Too short time before last clustering compaction, skip compaction")
|
|
return false, nil
|
|
}
|
|
if time.Since(pTime) > Params.DataCoordCfg.ClusteringCompactionMaxInterval.GetAsDuration(time.Second) {
|
|
log.Info("It is a long time after last clustering compaction, do compaction")
|
|
return true, nil
|
|
}
|
|
|
|
var compactedSegmentSize int64 = 0
|
|
var uncompactedSegmentSize int64 = 0
|
|
for _, seg := range segments {
|
|
if lo.Contains(partitionStats.SegmentIDs, seg.ID) {
|
|
compactedSegmentSize += seg.getSegmentSize()
|
|
} else {
|
|
uncompactedSegmentSize += seg.getSegmentSize()
|
|
}
|
|
}
|
|
|
|
// size based
|
|
if uncompactedSegmentSize > Params.DataCoordCfg.ClusteringCompactionNewDataSizeThreshold.GetAsSize() {
|
|
log.Info("New data is larger than threshold, do compaction", zap.Int64("newDataSize", uncompactedSegmentSize))
|
|
return true, nil
|
|
}
|
|
log.Info("New data is smaller than threshold, skip compaction", zap.Int64("newDataSize", uncompactedSegmentSize))
|
|
return false, nil
|
|
}
|
|
|
|
var _ CompactionView = (*ClusteringSegmentsView)(nil)
|
|
|
|
type ClusteringSegmentsView struct {
|
|
label *CompactionGroupLabel
|
|
segments []*SegmentView
|
|
clusteringKeyField *schemapb.FieldSchema
|
|
collectionTTL time.Duration
|
|
triggerID int64
|
|
}
|
|
|
|
func (v *ClusteringSegmentsView) GetGroupLabel() *CompactionGroupLabel {
|
|
if v == nil {
|
|
return &CompactionGroupLabel{}
|
|
}
|
|
return v.label
|
|
}
|
|
|
|
func (v *ClusteringSegmentsView) GetSegmentsView() []*SegmentView {
|
|
if v == nil {
|
|
return nil
|
|
}
|
|
return v.segments
|
|
}
|
|
|
|
func (v *ClusteringSegmentsView) Append(segments ...*SegmentView) {
|
|
if v.segments == nil {
|
|
v.segments = segments
|
|
return
|
|
}
|
|
|
|
v.segments = append(v.segments, segments...)
|
|
}
|
|
|
|
func (v *ClusteringSegmentsView) String() string {
|
|
strs := lo.Map(v.segments, func(segView *SegmentView, _ int) string {
|
|
return segView.String()
|
|
})
|
|
return fmt.Sprintf("label=<%s>, segments=%v", v.label.String(), strs)
|
|
}
|
|
|
|
func (v *ClusteringSegmentsView) Trigger() (CompactionView, string) {
|
|
return v, ""
|
|
}
|
|
|
|
func (v *ClusteringSegmentsView) ForceTrigger() (CompactionView, string) {
|
|
panic("implement me")
|
|
}
|