diff --git a/internal/datacoord/compaction_task_l0.go b/internal/datacoord/compaction_task_l0.go index bab3618b2b..2af08b5317 100644 --- a/internal/datacoord/compaction_task_l0.go +++ b/internal/datacoord/compaction_task_l0.go @@ -28,6 +28,7 @@ import ( "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus/internal/proto/datapb" + "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/log" "github.com/milvus-io/milvus/pkg/util/merr" ) @@ -65,56 +66,69 @@ func (t *l0CompactionTask) processPipelining() bool { if t.NeedReAssignNodeID() { return false } + + log := log.With(zap.Int64("triggerID", t.GetTriggerID()), zap.Int64("nodeID", t.GetNodeID())) var err error t.plan, err = t.BuildCompactionRequest() if err != nil { - err2 := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error())) - return err2 == nil + log.Warn("l0CompactionTask failed to build compaction request", zap.Error(err)) + err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error())) + if err != nil { + log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err)) + return false + } + + return t.processFailed() } - err = t.sessions.Compaction(context.Background(), t.GetNodeID(), t.GetPlan()) + + err = t.sessions.Compaction(context.TODO(), t.GetNodeID(), t.GetPlan()) if err != nil { - log.Warn("Failed to notify compaction tasks to DataNode", zap.Error(err)) + log.Warn("l0CompactionTask failed to notify compaction tasks to DataNode", zap.Int64("planID", t.GetPlanID()), zap.Error(err)) t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID)) return false } + t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_executing)) return false } func (t *l0CompactionTask) processExecuting() bool { + log := log.With(zap.Int64("planID", t.GetPlanID()), zap.Int64("nodeID", t.GetNodeID())) result, err := t.sessions.GetCompactionPlanResult(t.GetNodeID(), t.GetPlanID()) if err != nil || result == nil { if errors.Is(err, merr.ErrNodeNotFound) { t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID)) } + log.Warn("l0CompactionTask failed to get compaction result", zap.Error(err)) return false } switch result.GetState() { case datapb.CompactionTaskState_executing: if t.checkTimeout() { err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_timeout)) - if err == nil { - return t.processTimeout() + if err != nil { + log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err)) + return false } + return t.processTimeout() } - return false case datapb.CompactionTaskState_completed: t.result = result - saveSuccess := t.saveSegmentMeta() - if !saveSuccess { + if err := t.saveSegmentMeta(); err != nil { + log.Warn("l0CompactionTask failed to save segment meta", zap.Error(err)) return false } - err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved)) - if err == nil { - return t.processMetaSaved() + + if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved)); err != nil { + return false } - return false + return t.processMetaSaved() case datapb.CompactionTaskState_failed: - err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed)) - if err != nil { - log.Warn("fail to updateAndSaveTaskMeta") + if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed)); err != nil { + log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err)) + return false } - return false + return t.processFailed() } return false } @@ -244,10 +258,9 @@ func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, err // Select sealed L1 segments for LevelZero compaction that meets the condition: // dmlPos < triggerInfo.pos sealedSegments := t.meta.SelectSegments(WithCollection(t.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool { - return (t.GetPartitionID() == -1 || info.GetPartitionID() == t.GetPartitionID()) && + return (t.GetPartitionID() == common.AllPartitionsID || info.GetPartitionID() == t.GetPartitionID()) && info.GetInsertChannel() == plan.GetChannel() && isFlushState(info.GetState()) && - //!info.isCompacting && !info.GetIsImporting() && info.GetLevel() != datapb.SegmentLevel_L0 && info.GetStartPosition().GetTimestamp() < t.GetPos().GetTimestamp() @@ -262,8 +275,8 @@ func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, err for _, segInfo := range sealedSegments { // TODO should allow parallel executing of l0 compaction if segInfo.isCompacting { - log.Info("l0 compaction candidate segment is compacting") - return nil, merr.WrapErrCompactionPlanConflict("segment is compacting") + log.Info("l0 compaction candidate segment is compacting", zap.Int64("segmentID", segInfo.GetID())) + return nil, merr.WrapErrCompactionPlanConflict(fmt.Sprintf("segment %d is compacting", segInfo.GetID())) } } @@ -317,14 +330,17 @@ func (t *l0CompactionTask) processTimeout() bool { } func (t *l0CompactionTask) processFailed() bool { - if err := t.sessions.DropCompactionPlan(t.GetNodeID(), &datapb.DropCompactionPlanRequest{ - PlanID: t.GetPlanID(), - }); err != nil { - log.Warn("l0CompactionTask processFailed unable to drop compaction plan", zap.Int64("planID", t.GetPlanID()), zap.Error(err)) + if t.GetNodeID() != 0 && t.GetNodeID() != NullNodeID { + err := t.sessions.DropCompactionPlan(t.GetNodeID(), &datapb.DropCompactionPlanRequest{ + PlanID: t.GetPlanID(), + }) + if err != nil { + log.Warn("l0CompactionTask processFailed unable to drop compaction plan", zap.Int64("planID", t.GetPlanID()), zap.Error(err)) + } } t.resetSegmentCompacting() - log.Info("l0CompactionTask processFailed done", zap.Int64("planID", t.GetPlanID())) + log.Info("l0CompactionTask processFailed done", zap.Int64("taskID", t.GetTriggerID()), zap.Int64("planID", t.GetPlanID())) return true } @@ -364,7 +380,7 @@ func (t *l0CompactionTask) SaveTaskMeta() error { return t.saveTaskMeta(t.CompactionTask) } -func (t *l0CompactionTask) saveSegmentMeta() bool { +func (t *l0CompactionTask) saveSegmentMeta() error { result := t.result plan := t.GetPlan() var operators []UpdateOperator @@ -383,10 +399,6 @@ func (t *l0CompactionTask) saveSegmentMeta() bool { log.Info("meta update: update segments info for level zero compaction", zap.Int64("planID", plan.GetPlanID()), ) - err := t.meta.UpdateSegmentsInfo(operators...) - if err != nil { - log.Info("Failed to saveSegmentMeta for compaction tasks to DataNode", zap.Error(err)) - return false - } - return true + + return t.meta.UpdateSegmentsInfo(operators...) } diff --git a/internal/datacoord/compaction_task_l0_test.go b/internal/datacoord/compaction_task_l0_test.go index caa118cc84..f337017efd 100644 --- a/internal/datacoord/compaction_task_l0_test.go +++ b/internal/datacoord/compaction_task_l0_test.go @@ -1,6 +1,9 @@ package datacoord import ( + "context" + + "github.com/cockroachdb/errors" "github.com/samber/lo" "github.com/stretchr/testify/mock" @@ -121,3 +124,149 @@ func (s *CompactionTaskSuite) TestProcessRefreshPlan_SelectZeroSegmentsL0() { _, err := task.BuildCompactionRequest() s.Error(err) } + +func generateTestL0Task(state datapb.CompactionTaskState) *l0CompactionTask { + return &l0CompactionTask{ + CompactionTask: &datapb.CompactionTask{ + PlanID: 1, + TriggerID: 19530, + CollectionID: 1, + PartitionID: 10, + Type: datapb.CompactionType_Level0DeleteCompaction, + NodeID: NullNodeID, + State: state, + InputSegments: []int64{100, 101}, + }, + } +} + +func (s *CompactionTaskSuite) SetupSubTest() { + s.SetupTest() +} + +func (s *CompactionTaskSuite) TestProcessStateTrans() { + s.Run("test pipelining needReassignNodeID", func() { + t := generateTestL0Task(datapb.CompactionTaskState_pipelining) + t.NodeID = NullNodeID + got := t.Process() + s.False(got) + s.Equal(datapb.CompactionTaskState_pipelining, t.State) + s.EqualValues(NullNodeID, t.NodeID) + }) + + s.Run("test pipelining BuildCompactionRequest failed", func() { + t := generateTestL0Task(datapb.CompactionTaskState_pipelining) + t.NodeID = 100 + channel := "ch-1" + deltaLogs := []*datapb.FieldBinlog{getFieldBinlogIDs(101, 3)} + + t.meta = s.mockMeta + s.mockMeta.EXPECT().SelectSegments(mock.Anything, mock.Anything).Return( + []*SegmentInfo{ + {SegmentInfo: &datapb.SegmentInfo{ + ID: 200, + Level: datapb.SegmentLevel_L1, + InsertChannel: channel, + }, isCompacting: true}, + }, + ) + + s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).RunAndReturn(func(segID int64) *SegmentInfo { + return &SegmentInfo{SegmentInfo: &datapb.SegmentInfo{ + ID: segID, + Level: datapb.SegmentLevel_L0, + InsertChannel: channel, + State: commonpb.SegmentState_Flushed, + Deltalogs: deltaLogs, + }} + }).Twice() + s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything).Return(nil).Once() + s.mockMeta.EXPECT().SetSegmentsCompacting(mock.Anything, false).Return() + + t.sessions = s.mockSessMgr + s.mockSessMgr.EXPECT().DropCompactionPlan(mock.Anything, mock.Anything).Return(nil).Once() + + got := t.Process() + s.True(got) + s.Equal(datapb.CompactionTaskState_failed, t.State) + }) + + s.Run("test pipelining Compaction failed", func() { + t := generateTestL0Task(datapb.CompactionTaskState_pipelining) + t.NodeID = 100 + channel := "ch-1" + deltaLogs := []*datapb.FieldBinlog{getFieldBinlogIDs(101, 3)} + + t.meta = s.mockMeta + s.mockMeta.EXPECT().SelectSegments(mock.Anything, mock.Anything).Return( + []*SegmentInfo{ + {SegmentInfo: &datapb.SegmentInfo{ + ID: 200, + Level: datapb.SegmentLevel_L1, + InsertChannel: channel, + }}, + }, + ) + + s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).RunAndReturn(func(segID int64) *SegmentInfo { + return &SegmentInfo{SegmentInfo: &datapb.SegmentInfo{ + ID: segID, + Level: datapb.SegmentLevel_L0, + InsertChannel: channel, + State: commonpb.SegmentState_Flushed, + Deltalogs: deltaLogs, + }} + }).Twice() + s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything).Return(nil) + + t.sessions = s.mockSessMgr + s.mockSessMgr.EXPECT().Compaction(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, nodeID int64, plan *datapb.CompactionPlan) error { + s.Require().EqualValues(t.NodeID, nodeID) + return errors.New("mock error") + }) + + got := t.Process() + s.False(got) + s.Equal(datapb.CompactionTaskState_pipelining, t.State) + s.EqualValues(NullNodeID, t.NodeID) + }) + + s.Run("test pipelining success", func() { + t := generateTestL0Task(datapb.CompactionTaskState_pipelining) + t.NodeID = 100 + channel := "ch-1" + deltaLogs := []*datapb.FieldBinlog{getFieldBinlogIDs(101, 3)} + + t.meta = s.mockMeta + s.mockMeta.EXPECT().SelectSegments(mock.Anything, mock.Anything).Return( + []*SegmentInfo{ + {SegmentInfo: &datapb.SegmentInfo{ + ID: 200, + Level: datapb.SegmentLevel_L1, + InsertChannel: channel, + }}, + }, + ) + + s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).RunAndReturn(func(segID int64) *SegmentInfo { + return &SegmentInfo{SegmentInfo: &datapb.SegmentInfo{ + ID: segID, + Level: datapb.SegmentLevel_L0, + InsertChannel: channel, + State: commonpb.SegmentState_Flushed, + Deltalogs: deltaLogs, + }} + }).Twice() + s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything).Return(nil).Once() + + t.sessions = s.mockSessMgr + s.mockSessMgr.EXPECT().Compaction(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, nodeID int64, plan *datapb.CompactionPlan) error { + s.Require().EqualValues(t.NodeID, nodeID) + return nil + }) + + got := t.Process() + s.False(got) + s.Equal(datapb.CompactionTaskState_executing, t.State) + }) +} diff --git a/internal/datacoord/compaction_trigger_v2.go b/internal/datacoord/compaction_trigger_v2.go index 50147a7636..56df7d4fdb 100644 --- a/internal/datacoord/compaction_trigger_v2.go +++ b/internal/datacoord/compaction_trigger_v2.go @@ -255,6 +255,7 @@ func (m *CompactionTriggerManager) SubmitL0ViewToScheduler(ctx context.Context, zap.Int64("taskID", taskID), zap.Int64("planID", task.GetPlanID()), zap.String("type", task.GetType().String()), + zap.Int64s("L0 segments", levelZeroSegs), ) }