mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-02 11:59:00 +08:00
fix: Compaction task l0 state transfer wrong (#34597)
This bug caused failed L0 compaction tasks never end. See also: #34460 --------- Signed-off-by: yangxuan <xuan.yang@zilliz.com>
This commit is contained in:
parent
104d0966b7
commit
d7966f46ad
@ -28,6 +28,7 @@ import (
|
|||||||
|
|
||||||
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
||||||
"github.com/milvus-io/milvus/internal/proto/datapb"
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
||||||
|
"github.com/milvus-io/milvus/pkg/common"
|
||||||
"github.com/milvus-io/milvus/pkg/log"
|
"github.com/milvus-io/milvus/pkg/log"
|
||||||
"github.com/milvus-io/milvus/pkg/util/merr"
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
||||||
)
|
)
|
||||||
@ -65,56 +66,69 @@ func (t *l0CompactionTask) processPipelining() bool {
|
|||||||
if t.NeedReAssignNodeID() {
|
if t.NeedReAssignNodeID() {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log := log.With(zap.Int64("triggerID", t.GetTriggerID()), zap.Int64("nodeID", t.GetNodeID()))
|
||||||
var err error
|
var err error
|
||||||
t.plan, err = t.BuildCompactionRequest()
|
t.plan, err = t.BuildCompactionRequest()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
err2 := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error()))
|
log.Warn("l0CompactionTask failed to build compaction request", zap.Error(err))
|
||||||
return err2 == nil
|
err = t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed), setFailReason(err.Error()))
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err))
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return t.processFailed()
|
||||||
}
|
}
|
||||||
err = t.sessions.Compaction(context.Background(), t.GetNodeID(), t.GetPlan())
|
|
||||||
|
err = t.sessions.Compaction(context.TODO(), t.GetNodeID(), t.GetPlan())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn("Failed to notify compaction tasks to DataNode", zap.Error(err))
|
log.Warn("l0CompactionTask failed to notify compaction tasks to DataNode", zap.Int64("planID", t.GetPlanID()), zap.Error(err))
|
||||||
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
|
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_executing))
|
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_executing))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *l0CompactionTask) processExecuting() bool {
|
func (t *l0CompactionTask) processExecuting() bool {
|
||||||
|
log := log.With(zap.Int64("planID", t.GetPlanID()), zap.Int64("nodeID", t.GetNodeID()))
|
||||||
result, err := t.sessions.GetCompactionPlanResult(t.GetNodeID(), t.GetPlanID())
|
result, err := t.sessions.GetCompactionPlanResult(t.GetNodeID(), t.GetPlanID())
|
||||||
if err != nil || result == nil {
|
if err != nil || result == nil {
|
||||||
if errors.Is(err, merr.ErrNodeNotFound) {
|
if errors.Is(err, merr.ErrNodeNotFound) {
|
||||||
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
|
t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_pipelining), setNodeID(NullNodeID))
|
||||||
}
|
}
|
||||||
|
log.Warn("l0CompactionTask failed to get compaction result", zap.Error(err))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
switch result.GetState() {
|
switch result.GetState() {
|
||||||
case datapb.CompactionTaskState_executing:
|
case datapb.CompactionTaskState_executing:
|
||||||
if t.checkTimeout() {
|
if t.checkTimeout() {
|
||||||
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_timeout))
|
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_timeout))
|
||||||
if err == nil {
|
if err != nil {
|
||||||
return t.processTimeout()
|
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err))
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
return t.processTimeout()
|
||||||
}
|
}
|
||||||
return false
|
|
||||||
case datapb.CompactionTaskState_completed:
|
case datapb.CompactionTaskState_completed:
|
||||||
t.result = result
|
t.result = result
|
||||||
saveSuccess := t.saveSegmentMeta()
|
if err := t.saveSegmentMeta(); err != nil {
|
||||||
if !saveSuccess {
|
log.Warn("l0CompactionTask failed to save segment meta", zap.Error(err))
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved))
|
|
||||||
if err == nil {
|
if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_meta_saved)); err != nil {
|
||||||
return t.processMetaSaved()
|
return false
|
||||||
}
|
}
|
||||||
return false
|
return t.processMetaSaved()
|
||||||
case datapb.CompactionTaskState_failed:
|
case datapb.CompactionTaskState_failed:
|
||||||
err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed))
|
if err := t.updateAndSaveTaskMeta(setState(datapb.CompactionTaskState_failed)); err != nil {
|
||||||
if err != nil {
|
log.Warn("l0CompactionTask failed to updateAndSaveTaskMeta", zap.Error(err))
|
||||||
log.Warn("fail to updateAndSaveTaskMeta")
|
return false
|
||||||
}
|
}
|
||||||
return false
|
return t.processFailed()
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -244,10 +258,9 @@ func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, err
|
|||||||
// Select sealed L1 segments for LevelZero compaction that meets the condition:
|
// Select sealed L1 segments for LevelZero compaction that meets the condition:
|
||||||
// dmlPos < triggerInfo.pos
|
// dmlPos < triggerInfo.pos
|
||||||
sealedSegments := t.meta.SelectSegments(WithCollection(t.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
|
sealedSegments := t.meta.SelectSegments(WithCollection(t.GetCollectionID()), SegmentFilterFunc(func(info *SegmentInfo) bool {
|
||||||
return (t.GetPartitionID() == -1 || info.GetPartitionID() == t.GetPartitionID()) &&
|
return (t.GetPartitionID() == common.AllPartitionsID || info.GetPartitionID() == t.GetPartitionID()) &&
|
||||||
info.GetInsertChannel() == plan.GetChannel() &&
|
info.GetInsertChannel() == plan.GetChannel() &&
|
||||||
isFlushState(info.GetState()) &&
|
isFlushState(info.GetState()) &&
|
||||||
//!info.isCompacting &&
|
|
||||||
!info.GetIsImporting() &&
|
!info.GetIsImporting() &&
|
||||||
info.GetLevel() != datapb.SegmentLevel_L0 &&
|
info.GetLevel() != datapb.SegmentLevel_L0 &&
|
||||||
info.GetStartPosition().GetTimestamp() < t.GetPos().GetTimestamp()
|
info.GetStartPosition().GetTimestamp() < t.GetPos().GetTimestamp()
|
||||||
@ -262,8 +275,8 @@ func (t *l0CompactionTask) BuildCompactionRequest() (*datapb.CompactionPlan, err
|
|||||||
for _, segInfo := range sealedSegments {
|
for _, segInfo := range sealedSegments {
|
||||||
// TODO should allow parallel executing of l0 compaction
|
// TODO should allow parallel executing of l0 compaction
|
||||||
if segInfo.isCompacting {
|
if segInfo.isCompacting {
|
||||||
log.Info("l0 compaction candidate segment is compacting")
|
log.Info("l0 compaction candidate segment is compacting", zap.Int64("segmentID", segInfo.GetID()))
|
||||||
return nil, merr.WrapErrCompactionPlanConflict("segment is compacting")
|
return nil, merr.WrapErrCompactionPlanConflict(fmt.Sprintf("segment %d is compacting", segInfo.GetID()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -317,14 +330,17 @@ func (t *l0CompactionTask) processTimeout() bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (t *l0CompactionTask) processFailed() bool {
|
func (t *l0CompactionTask) processFailed() bool {
|
||||||
if err := t.sessions.DropCompactionPlan(t.GetNodeID(), &datapb.DropCompactionPlanRequest{
|
if t.GetNodeID() != 0 && t.GetNodeID() != NullNodeID {
|
||||||
PlanID: t.GetPlanID(),
|
err := t.sessions.DropCompactionPlan(t.GetNodeID(), &datapb.DropCompactionPlanRequest{
|
||||||
}); err != nil {
|
PlanID: t.GetPlanID(),
|
||||||
log.Warn("l0CompactionTask processFailed unable to drop compaction plan", zap.Int64("planID", t.GetPlanID()), zap.Error(err))
|
})
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("l0CompactionTask processFailed unable to drop compaction plan", zap.Int64("planID", t.GetPlanID()), zap.Error(err))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
t.resetSegmentCompacting()
|
t.resetSegmentCompacting()
|
||||||
log.Info("l0CompactionTask processFailed done", zap.Int64("planID", t.GetPlanID()))
|
log.Info("l0CompactionTask processFailed done", zap.Int64("taskID", t.GetTriggerID()), zap.Int64("planID", t.GetPlanID()))
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -364,7 +380,7 @@ func (t *l0CompactionTask) SaveTaskMeta() error {
|
|||||||
return t.saveTaskMeta(t.CompactionTask)
|
return t.saveTaskMeta(t.CompactionTask)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *l0CompactionTask) saveSegmentMeta() bool {
|
func (t *l0CompactionTask) saveSegmentMeta() error {
|
||||||
result := t.result
|
result := t.result
|
||||||
plan := t.GetPlan()
|
plan := t.GetPlan()
|
||||||
var operators []UpdateOperator
|
var operators []UpdateOperator
|
||||||
@ -383,10 +399,6 @@ func (t *l0CompactionTask) saveSegmentMeta() bool {
|
|||||||
log.Info("meta update: update segments info for level zero compaction",
|
log.Info("meta update: update segments info for level zero compaction",
|
||||||
zap.Int64("planID", plan.GetPlanID()),
|
zap.Int64("planID", plan.GetPlanID()),
|
||||||
)
|
)
|
||||||
err := t.meta.UpdateSegmentsInfo(operators...)
|
|
||||||
if err != nil {
|
return t.meta.UpdateSegmentsInfo(operators...)
|
||||||
log.Info("Failed to saveSegmentMeta for compaction tasks to DataNode", zap.Error(err))
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,9 @@
|
|||||||
package datacoord
|
package datacoord
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"context"
|
||||||
|
|
||||||
|
"github.com/cockroachdb/errors"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
"github.com/stretchr/testify/mock"
|
"github.com/stretchr/testify/mock"
|
||||||
|
|
||||||
@ -121,3 +124,149 @@ func (s *CompactionTaskSuite) TestProcessRefreshPlan_SelectZeroSegmentsL0() {
|
|||||||
_, err := task.BuildCompactionRequest()
|
_, err := task.BuildCompactionRequest()
|
||||||
s.Error(err)
|
s.Error(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func generateTestL0Task(state datapb.CompactionTaskState) *l0CompactionTask {
|
||||||
|
return &l0CompactionTask{
|
||||||
|
CompactionTask: &datapb.CompactionTask{
|
||||||
|
PlanID: 1,
|
||||||
|
TriggerID: 19530,
|
||||||
|
CollectionID: 1,
|
||||||
|
PartitionID: 10,
|
||||||
|
Type: datapb.CompactionType_Level0DeleteCompaction,
|
||||||
|
NodeID: NullNodeID,
|
||||||
|
State: state,
|
||||||
|
InputSegments: []int64{100, 101},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CompactionTaskSuite) SetupSubTest() {
|
||||||
|
s.SetupTest()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CompactionTaskSuite) TestProcessStateTrans() {
|
||||||
|
s.Run("test pipelining needReassignNodeID", func() {
|
||||||
|
t := generateTestL0Task(datapb.CompactionTaskState_pipelining)
|
||||||
|
t.NodeID = NullNodeID
|
||||||
|
got := t.Process()
|
||||||
|
s.False(got)
|
||||||
|
s.Equal(datapb.CompactionTaskState_pipelining, t.State)
|
||||||
|
s.EqualValues(NullNodeID, t.NodeID)
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("test pipelining BuildCompactionRequest failed", func() {
|
||||||
|
t := generateTestL0Task(datapb.CompactionTaskState_pipelining)
|
||||||
|
t.NodeID = 100
|
||||||
|
channel := "ch-1"
|
||||||
|
deltaLogs := []*datapb.FieldBinlog{getFieldBinlogIDs(101, 3)}
|
||||||
|
|
||||||
|
t.meta = s.mockMeta
|
||||||
|
s.mockMeta.EXPECT().SelectSegments(mock.Anything, mock.Anything).Return(
|
||||||
|
[]*SegmentInfo{
|
||||||
|
{SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: 200,
|
||||||
|
Level: datapb.SegmentLevel_L1,
|
||||||
|
InsertChannel: channel,
|
||||||
|
}, isCompacting: true},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).RunAndReturn(func(segID int64) *SegmentInfo {
|
||||||
|
return &SegmentInfo{SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: segID,
|
||||||
|
Level: datapb.SegmentLevel_L0,
|
||||||
|
InsertChannel: channel,
|
||||||
|
State: commonpb.SegmentState_Flushed,
|
||||||
|
Deltalogs: deltaLogs,
|
||||||
|
}}
|
||||||
|
}).Twice()
|
||||||
|
s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything).Return(nil).Once()
|
||||||
|
s.mockMeta.EXPECT().SetSegmentsCompacting(mock.Anything, false).Return()
|
||||||
|
|
||||||
|
t.sessions = s.mockSessMgr
|
||||||
|
s.mockSessMgr.EXPECT().DropCompactionPlan(mock.Anything, mock.Anything).Return(nil).Once()
|
||||||
|
|
||||||
|
got := t.Process()
|
||||||
|
s.True(got)
|
||||||
|
s.Equal(datapb.CompactionTaskState_failed, t.State)
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("test pipelining Compaction failed", func() {
|
||||||
|
t := generateTestL0Task(datapb.CompactionTaskState_pipelining)
|
||||||
|
t.NodeID = 100
|
||||||
|
channel := "ch-1"
|
||||||
|
deltaLogs := []*datapb.FieldBinlog{getFieldBinlogIDs(101, 3)}
|
||||||
|
|
||||||
|
t.meta = s.mockMeta
|
||||||
|
s.mockMeta.EXPECT().SelectSegments(mock.Anything, mock.Anything).Return(
|
||||||
|
[]*SegmentInfo{
|
||||||
|
{SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: 200,
|
||||||
|
Level: datapb.SegmentLevel_L1,
|
||||||
|
InsertChannel: channel,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).RunAndReturn(func(segID int64) *SegmentInfo {
|
||||||
|
return &SegmentInfo{SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: segID,
|
||||||
|
Level: datapb.SegmentLevel_L0,
|
||||||
|
InsertChannel: channel,
|
||||||
|
State: commonpb.SegmentState_Flushed,
|
||||||
|
Deltalogs: deltaLogs,
|
||||||
|
}}
|
||||||
|
}).Twice()
|
||||||
|
s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything).Return(nil)
|
||||||
|
|
||||||
|
t.sessions = s.mockSessMgr
|
||||||
|
s.mockSessMgr.EXPECT().Compaction(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, nodeID int64, plan *datapb.CompactionPlan) error {
|
||||||
|
s.Require().EqualValues(t.NodeID, nodeID)
|
||||||
|
return errors.New("mock error")
|
||||||
|
})
|
||||||
|
|
||||||
|
got := t.Process()
|
||||||
|
s.False(got)
|
||||||
|
s.Equal(datapb.CompactionTaskState_pipelining, t.State)
|
||||||
|
s.EqualValues(NullNodeID, t.NodeID)
|
||||||
|
})
|
||||||
|
|
||||||
|
s.Run("test pipelining success", func() {
|
||||||
|
t := generateTestL0Task(datapb.CompactionTaskState_pipelining)
|
||||||
|
t.NodeID = 100
|
||||||
|
channel := "ch-1"
|
||||||
|
deltaLogs := []*datapb.FieldBinlog{getFieldBinlogIDs(101, 3)}
|
||||||
|
|
||||||
|
t.meta = s.mockMeta
|
||||||
|
s.mockMeta.EXPECT().SelectSegments(mock.Anything, mock.Anything).Return(
|
||||||
|
[]*SegmentInfo{
|
||||||
|
{SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: 200,
|
||||||
|
Level: datapb.SegmentLevel_L1,
|
||||||
|
InsertChannel: channel,
|
||||||
|
}},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
s.mockMeta.EXPECT().GetHealthySegment(mock.Anything).RunAndReturn(func(segID int64) *SegmentInfo {
|
||||||
|
return &SegmentInfo{SegmentInfo: &datapb.SegmentInfo{
|
||||||
|
ID: segID,
|
||||||
|
Level: datapb.SegmentLevel_L0,
|
||||||
|
InsertChannel: channel,
|
||||||
|
State: commonpb.SegmentState_Flushed,
|
||||||
|
Deltalogs: deltaLogs,
|
||||||
|
}}
|
||||||
|
}).Twice()
|
||||||
|
s.mockMeta.EXPECT().SaveCompactionTask(mock.Anything).Return(nil).Once()
|
||||||
|
|
||||||
|
t.sessions = s.mockSessMgr
|
||||||
|
s.mockSessMgr.EXPECT().Compaction(mock.Anything, mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, nodeID int64, plan *datapb.CompactionPlan) error {
|
||||||
|
s.Require().EqualValues(t.NodeID, nodeID)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
|
||||||
|
got := t.Process()
|
||||||
|
s.False(got)
|
||||||
|
s.Equal(datapb.CompactionTaskState_executing, t.State)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
@ -255,6 +255,7 @@ func (m *CompactionTriggerManager) SubmitL0ViewToScheduler(ctx context.Context,
|
|||||||
zap.Int64("taskID", taskID),
|
zap.Int64("taskID", taskID),
|
||||||
zap.Int64("planID", task.GetPlanID()),
|
zap.Int64("planID", task.GetPlanID()),
|
||||||
zap.String("type", task.GetType().String()),
|
zap.String("type", task.GetType().String()),
|
||||||
|
zap.Int64s("L0 segments", levelZeroSegs),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user