milvus/internal/datacoord/compaction_policy_clustering.go
cai.zhang ac8c5fcd5d
enhance: Remove pre-marking segments as L2 during clustering compaction (#36799)
issue: #36686 

This pr will remove pre-marking segments as L2 during clustering
compaction in version 2.5, and ensure compatibility with version 2.4.

The core of this change is to **ensure that the many-to-many lineage
derivation logic is correct, making sure that both the parent and child
cannot simultaneously exist in the target segment view.**

feature:
  - Clustering compaction no longer marks the input segments as L2.
- Add a new field `is_invisible` to `segmentInfo`, and mark segments
that have completed clustering but have not yet built indexes as
`is_invisible` to prevent them from being loaded prematurely."
- Do not mark the input segment as `Dropped` before the clustering
compaction is completed.
- After compaction fails, only the result segment needs to be marked as
Dropped.

compatibility:
- If the upgraded task has not failed, there are no compatibility
issues.
- If the status after the upgrade is `MetaSaved`, then skip the stats
task based on whether TmpSegments is empty.
  - If the failure occurs before `MetaSaved`:
- there are no ResultSegments, and InputSegments have not been marked as
dropped yet.
    - the level of input segments need to revert to LastLevel
  - If the failure occurs after `MetaSaved`:
- ResultSegments have already been generated, and InputSegments have
been marked as Dropped. At this point, simply make the ResultSegments
visible.
- the level of ResultSegments needs to be set to L1(in order to
participate in mixCompaction)

---------

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
2024-10-23 17:15:28 +08:00

314 lines
11 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package datacoord
import (
"context"
"fmt"
"time"
"github.com/samber/lo"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/internal/datacoord/allocator"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/util/clustering"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/pkg/util/paramtable"
)
type clusteringCompactionPolicy struct {
meta *meta
allocator allocator.Allocator
handler Handler
}
func newClusteringCompactionPolicy(meta *meta, allocator allocator.Allocator, handler Handler) *clusteringCompactionPolicy {
return &clusteringCompactionPolicy{meta: meta, allocator: allocator, handler: handler}
}
func (policy *clusteringCompactionPolicy) Enable() bool {
return Params.DataCoordCfg.EnableAutoCompaction.GetAsBool() &&
Params.DataCoordCfg.ClusteringCompactionEnable.GetAsBool() &&
Params.DataCoordCfg.ClusteringCompactionAutoEnable.GetAsBool()
}
func (policy *clusteringCompactionPolicy) Trigger() (map[CompactionTriggerType][]CompactionView, error) {
log.Info("start trigger clusteringCompactionPolicy...")
ctx := context.Background()
collections := policy.meta.GetCollections()
events := make(map[CompactionTriggerType][]CompactionView, 0)
views := make([]CompactionView, 0)
for _, collection := range collections {
collectionViews, _, err := policy.triggerOneCollection(ctx, collection.ID, false)
if err != nil {
// not throw this error because no need to fail because of one collection
log.Warn("fail to trigger collection clustering compaction", zap.Int64("collectionID", collection.ID), zap.Error(err))
}
views = append(views, collectionViews...)
}
events[TriggerTypeClustering] = views
return events, nil
}
// todo: remove this check after support partial clustering compaction
func (policy *clusteringCompactionPolicy) checkAllL2SegmentsContains(ctx context.Context, collectionID, partitionID int64, channel string) bool {
getCompactingL2Segment := func(segment *SegmentInfo) bool {
return segment.CollectionID == collectionID &&
segment.PartitionID == partitionID &&
segment.InsertChannel == channel &&
isSegmentHealthy(segment) &&
segment.GetLevel() == datapb.SegmentLevel_L2 &&
segment.isCompacting
}
segments := policy.meta.SelectSegments(SegmentFilterFunc(getCompactingL2Segment))
if len(segments) > 0 {
log.Ctx(ctx).Info("there are some segments are compacting",
zap.Int64("collectionID", collectionID), zap.Int64("partitionID", partitionID),
zap.String("channel", channel), zap.Int64s("compacting segment", lo.Map(segments, func(segment *SegmentInfo, i int) int64 {
return segment.GetID()
})))
return false
}
return true
}
func (policy *clusteringCompactionPolicy) triggerOneCollection(ctx context.Context, collectionID int64, manual bool) ([]CompactionView, int64, error) {
log := log.With(zap.Int64("collectionID", collectionID))
log.Info("start trigger collection clustering compaction")
collection, err := policy.handler.GetCollection(ctx, collectionID)
if err != nil {
log.Warn("fail to get collection from handler")
return nil, 0, err
}
if collection == nil {
log.Warn("collection not exist")
return nil, 0, nil
}
clusteringKeyField := clustering.GetClusteringKeyField(collection.Schema)
if clusteringKeyField == nil {
log.Info("the collection has no clustering key, skip tigger clustering compaction")
return nil, 0, nil
}
compacting, triggerID := policy.collectionIsClusteringCompacting(collection.ID)
if compacting {
log.Info("collection is clustering compacting", zap.Int64("triggerID", triggerID))
return nil, triggerID, nil
}
newTriggerID, err := policy.allocator.AllocID(ctx)
if err != nil {
log.Warn("fail to allocate triggerID", zap.Error(err))
return nil, 0, err
}
partSegments := policy.meta.GetSegmentsChanPart(func(segment *SegmentInfo) bool {
return segment.CollectionID == collectionID &&
isSegmentHealthy(segment) &&
isFlush(segment) &&
!segment.isCompacting && // not compacting now
!segment.GetIsImporting() && // not importing now
segment.GetLevel() != datapb.SegmentLevel_L0 && // ignore level zero segments
!segment.GetIsInvisible()
})
views := make([]CompactionView, 0)
// partSegments is list of chanPartSegments, which is channel-partition organized segments
for _, group := range partSegments {
log := log.With(zap.Int64("partitionID", group.partitionID), zap.String("channel", group.channelName))
if !policy.checkAllL2SegmentsContains(ctx, group.collectionID, group.partitionID, group.channelName) {
log.Warn("clustering compaction cannot be done, otherwise the performance will fall back")
continue
}
collectionTTL, err := getCollectionTTL(collection.Properties)
if err != nil {
log.Warn("get collection ttl failed, skip to handle compaction")
return make([]CompactionView, 0), 0, err
}
if len(group.segments) == 0 {
log.Info("the length of SegmentsChanPart is 0, skip to handle compaction")
continue
}
if !manual {
execute, err := triggerClusteringCompactionPolicy(ctx, policy.meta, group.collectionID, group.partitionID, group.channelName, group.segments)
if err != nil {
log.Warn("failed to trigger clustering compaction", zap.Error(err))
continue
}
if !execute {
continue
}
}
segmentViews := GetViewsByInfo(group.segments...)
view := &ClusteringSegmentsView{
label: segmentViews[0].label,
segments: segmentViews,
clusteringKeyField: clusteringKeyField,
collectionTTL: collectionTTL,
triggerID: newTriggerID,
}
views = append(views, view)
}
log.Info("finish trigger collection clustering compaction", zap.Int("viewNum", len(views)))
return views, newTriggerID, nil
}
func (policy *clusteringCompactionPolicy) collectionIsClusteringCompacting(collectionID UniqueID) (bool, int64) {
triggers := policy.meta.compactionTaskMeta.GetCompactionTasksByCollection(collectionID)
if len(triggers) == 0 {
return false, 0
}
var latestTriggerID int64 = 0
for triggerID := range triggers {
if triggerID > latestTriggerID {
latestTriggerID = triggerID
}
}
tasks := triggers[latestTriggerID]
if len(tasks) > 0 {
cTasks := tasks
summary := summaryCompactionState(cTasks)
return summary.state == commonpb.CompactionState_Executing, cTasks[0].TriggerID
}
return false, 0
}
func calculateClusteringCompactionConfig(coll *collectionInfo, view CompactionView, expectedSegmentSize int64) (totalRows, maxSegmentRows, preferSegmentRows int64, err error) {
for _, s := range view.GetSegmentsView() {
totalRows += s.NumOfRows
}
clusteringMaxSegmentSizeRatio := paramtable.Get().DataCoordCfg.ClusteringCompactionMaxSegmentSizeRatio.GetAsFloat()
clusteringPreferSegmentSizeRatio := paramtable.Get().DataCoordCfg.ClusteringCompactionPreferSegmentSizeRatio.GetAsFloat()
maxRows, err := calBySegmentSizePolicy(coll.Schema, expectedSegmentSize)
if err != nil {
return 0, 0, 0, err
}
maxSegmentRows = int64(float64(maxRows) * clusteringMaxSegmentSizeRatio)
preferSegmentRows = int64(float64(maxRows) * clusteringPreferSegmentSizeRatio)
return
}
func triggerClusteringCompactionPolicy(ctx context.Context, meta *meta, collectionID int64, partitionID int64, channel string, segments []*SegmentInfo) (bool, error) {
log := log.With(zap.Int64("collectionID", collectionID), zap.Int64("partitionID", partitionID))
currentVersion := meta.partitionStatsMeta.GetCurrentPartitionStatsVersion(collectionID, partitionID, channel)
if currentVersion == 0 {
var newDataSize int64 = 0
for _, seg := range segments {
newDataSize += seg.getSegmentSize()
}
if newDataSize > Params.DataCoordCfg.ClusteringCompactionNewDataSizeThreshold.GetAsSize() {
log.Info("New data is larger than threshold, do compaction", zap.Int64("newDataSize", newDataSize))
return true, nil
}
log.Info("No partition stats and no enough new data, skip compaction", zap.Int64("newDataSize", newDataSize))
return false, nil
}
partitionStats := meta.GetPartitionStatsMeta().GetPartitionStats(collectionID, partitionID, channel, currentVersion)
if partitionStats == nil {
log.Info("partition stats not found")
return false, nil
}
timestampSeconds := partitionStats.GetCommitTime()
pTime := time.Unix(timestampSeconds, 0)
if time.Since(pTime) < Params.DataCoordCfg.ClusteringCompactionMinInterval.GetAsDuration(time.Second) {
log.Info("Too short time before last clustering compaction, skip compaction")
return false, nil
}
if time.Since(pTime) > Params.DataCoordCfg.ClusteringCompactionMaxInterval.GetAsDuration(time.Second) {
log.Info("It is a long time after last clustering compaction, do compaction")
return true, nil
}
var compactedSegmentSize int64 = 0
var uncompactedSegmentSize int64 = 0
for _, seg := range segments {
if lo.Contains(partitionStats.SegmentIDs, seg.ID) {
compactedSegmentSize += seg.getSegmentSize()
} else {
uncompactedSegmentSize += seg.getSegmentSize()
}
}
// size based
if uncompactedSegmentSize > Params.DataCoordCfg.ClusteringCompactionNewDataSizeThreshold.GetAsSize() {
log.Info("New data is larger than threshold, do compaction", zap.Int64("newDataSize", uncompactedSegmentSize))
return true, nil
}
log.Info("New data is smaller than threshold, skip compaction", zap.Int64("newDataSize", uncompactedSegmentSize))
return false, nil
}
var _ CompactionView = (*ClusteringSegmentsView)(nil)
type ClusteringSegmentsView struct {
label *CompactionGroupLabel
segments []*SegmentView
clusteringKeyField *schemapb.FieldSchema
collectionTTL time.Duration
triggerID int64
}
func (v *ClusteringSegmentsView) GetGroupLabel() *CompactionGroupLabel {
if v == nil {
return &CompactionGroupLabel{}
}
return v.label
}
func (v *ClusteringSegmentsView) GetSegmentsView() []*SegmentView {
if v == nil {
return nil
}
return v.segments
}
func (v *ClusteringSegmentsView) Append(segments ...*SegmentView) {
if v.segments == nil {
v.segments = segments
return
}
v.segments = append(v.segments, segments...)
}
func (v *ClusteringSegmentsView) String() string {
strs := lo.Map(v.segments, func(segView *SegmentView, _ int) string {
return segView.String()
})
return fmt.Sprintf("label=<%s>, segments=%v", v.label.String(), strs)
}
func (v *ClusteringSegmentsView) Trigger() (CompactionView, string) {
return v, ""
}
func (v *ClusteringSegmentsView) ForceTrigger() (CompactionView, string) {
panic("implement me")
}