Improve codes in insertbuffernode (#8054)

Add error handling in insertbuffernode See also: #7624, #7684 Signed-off-by: yangxuan <xuan.yang@zilliz.com>
2024-12-01 03:18:29 +08:00 · 2021-09-17 16:27:56 +08:00 · 2021-09-17 16:27:56 +08:00 · 6c434b4801
commit 6c434b4801
parent 82d4d19337
4 changed files with 131 additions and 64 deletions
--- a/internal/datanode/flow_graph_insert_buffer_node.go
+++ b/internal/datanode/flow_graph_insert_buffer_node.go
@ -23,7 +23,7 @@ import (
 	"unsafe"

 	"github.com/golang/protobuf/proto"
-
+	"github.com/opentracing/opentracing-go"
 	"go.uber.org/zap"

 	"github.com/milvus-io/milvus/internal/kv"
@ -31,9 +31,7 @@ import (
 	"github.com/milvus-io/milvus/internal/log"
 	"github.com/milvus-io/milvus/internal/msgstream"
 	"github.com/milvus-io/milvus/internal/storage"
-	"github.com/milvus-io/milvus/internal/util/flowgraph"
 	"github.com/milvus-io/milvus/internal/util/trace"
-	"github.com/opentracing/opentracing-go"

 	"github.com/milvus-io/milvus/internal/proto/commonpb"
 	"github.com/milvus-io/milvus/internal/proto/datapb"
@ -51,6 +49,7 @@ type (
 	InsertData = storage.InsertData
 	Blob       = storage.Blob
 )
+
 type insertBufferNode struct {
 	BaseNode
 	channelName  string
@ -137,24 +136,29 @@ func (ibNode *insertBufferNode) Name() string {
 	return "ibNode"
 }

-func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
+func (ibNode *insertBufferNode) Close() {
+	if ibNode.timeTickStream != nil {
+		ibNode.timeTickStream.Close()
+	}
+
+	if ibNode.segmentStatisticsStream != nil {
+		ibNode.segmentStatisticsStream.Close()
+	}
+}
+
+func (ibNode *insertBufferNode) Operate(in []Msg) []Msg {

 	// log.Debug("InsertBufferNode Operating")

 	if len(in) != 1 {
 		log.Error("Invalid operate message input in insertBufferNode", zap.Int("input length", len(in)))
-		// TODO: add error handling
+		return []Msg{}
 	}

 	iMsg, ok := in[0].(*insertMsg)
 	if !ok {
 		log.Error("type assertion failed for insertMsg")
-		// TODO: add error handling
-	}
-
-	if iMsg == nil {
-		ibNode.timeTickStream.Close()
-		ibNode.segmentStatisticsStream.Close()
+		ibNode.Close()
 		return []Msg{}
 	}

@ -179,46 +183,21 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 		endPositions = append(endPositions, pos)
 	}

-	// Updating segment statistics
-	uniqueSeg := make(map[UniqueID]int64)
-	for _, msg := range iMsg.insertMessages {
-
-		currentSegID := msg.GetSegmentID()
-		collID := msg.GetCollectionID()
-		partitionID := msg.GetPartitionID()
-
-		if !ibNode.replica.hasSegment(currentSegID, true) {
-			err := ibNode.replica.addNewSegment(currentSegID, collID, partitionID, msg.GetChannelID(),
-				startPositions[0], endPositions[0])
-			if err != nil {
-				log.Error("add segment wrong", zap.Error(err))
-			}
-
-		}
-
-		segNum := uniqueSeg[currentSegID]
-		uniqueSeg[currentSegID] = segNum + int64(len(msg.RowIDs))
+	// Updating segment statistics in replica
+	seg2Upload, err := ibNode.updateSegStatesInReplica(iMsg.insertMessages, startPositions[0], endPositions[0])
+	if err != nil {
+		log.Warn("update segment states in Replica wrong", zap.Error(err))
+		return []Msg{}
 	}

-	segToUpdate := make([]UniqueID, 0, len(uniqueSeg))
-	for id, num := range uniqueSeg {
-		segToUpdate = append(segToUpdate, id)
-
-		err := ibNode.replica.updateStatistics(id, num)
+	if len(seg2Upload) > 0 {
+		err := ibNode.uploadMemStates2Coord(seg2Upload)
 		if err != nil {
-			log.Error("update Segment Row number wrong", zap.Error(err))
+			log.Error("upload segment statistics to coord error", zap.Error(err))
 		}
 	}

-	if len(segToUpdate) > 0 {
-		err := ibNode.updateSegStatistics(segToUpdate)
-		if err != nil {
-			log.Error("update segment statistics error", zap.Error(err))
-		}
-	}
-
-	// iMsg is insertMsg
-	// 1. iMsg -> buffer
+	// insert messages -> buffer
 	for _, msg := range iMsg.insertMessages {
 		err := ibNode.bufferInsertMsg(iMsg, msg)
 		if err != nil {
@ -226,6 +205,7 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 		}
 	}

+	// TODO GOOSE: log updated segments' states
 	if len(iMsg.insertMessages) > 0 {
 		log.Debug("---insert buffer status---")
 		var stopSign int = 0
@ -239,9 +219,10 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 		}
 	}

-	finishCh := make(chan segmentFlushUnit, len(segToUpdate))
+	// Auto Flush
+	finishCh := make(chan segmentFlushUnit, len(seg2Upload))
 	finishCnt := sync.WaitGroup{}
-	for _, segToFlush := range segToUpdate {
+	for _, segToFlush := range seg2Upload {
 		// If full, auto flush
 		if ibNode.insertBuffer.full(segToFlush) {
 			log.Debug(". Insert Buffer full, auto flushing ",
@ -281,7 +262,7 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 		}
 	}

-	// iMsg is Flush() msg from datacoord
+	// Manul Flush
 	select {
 	case fmsg := <-ibNode.flushChan:
 		currentSegID := fmsg.segmentID
@ -362,6 +343,36 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 	return nil
 }

+func (ibNode *insertBufferNode) updateSegStatesInReplica(insertMsgs []*msgstream.InsertMsg, startPos, endPos *internalpb.MsgPosition) (seg2Upload []UniqueID, err error) {
+	uniqueSeg := make(map[UniqueID]int64)
+	for _, msg := range insertMsgs {
+
+		currentSegID := msg.GetSegmentID()
+		collID := msg.GetCollectionID()
+		partitionID := msg.GetPartitionID()
+
+		if !ibNode.replica.hasSegment(currentSegID, true) {
+			err = ibNode.replica.addNewSegment(currentSegID, collID, partitionID, msg.GetChannelID(),
+				startPos, endPos)
+			if err != nil {
+				log.Warn("add segment wrong", zap.Int64("Seg ID", currentSegID), zap.Error(err))
+				return
+			}
+		}
+
+		segNum := uniqueSeg[currentSegID]
+		uniqueSeg[currentSegID] = segNum + int64(len(msg.RowIDs))
+	}
+
+	seg2Upload = make([]UniqueID, 0, len(uniqueSeg))
+	for id, num := range uniqueSeg {
+		seg2Upload = append(seg2Upload, id)
+		ibNode.replica.updateStatistics(id, num)
+	}
+
+	return
+}
+
 // bufferInsertMsg put InsertMsg into buffer
 // 	1.1 fetch related schema from replica
 // 	1.2 Get buffer data and put data into each field buffer
@ -384,7 +395,6 @@ func (ibNode *insertBufferNode) bufferInsertMsg(iMsg *insertMsg, msg *msgstream.
 	// 1.1 Get Collection Schema
 	collSchema, err := ibNode.replica.getCollectionSchema(collectionID, msg.EndTs())
 	if err != nil {
-		// GOOSE TODO add error handler
 		log.Error("Get schema wrong:", zap.Error(err))
 		return err
 	}
@ -414,7 +424,6 @@ func (ibNode *insertBufferNode) bufferInsertMsg(iMsg *insertMsg, msg *msgstream.
 			if dim <= 0 {
 				log.Error("invalid dim")
 				continue
-				// TODO: add error handling
 			}

 			if _, ok := idata.Data[field.FieldID]; !ok {
@ -453,7 +462,7 @@ func (ibNode *insertBufferNode) bufferInsertMsg(iMsg *insertMsg, msg *msgstream.
 			}
 			if dim <= 0 {
 				log.Error("invalid dim")
-				// TODO: add error handling
+				continue
 			}

 			if _, ok := idata.Data[field.FieldID]; !ok {
@ -759,7 +768,7 @@ func (ibNode *insertBufferNode) writeHardTimeTick(ts Timestamp) error {
 	return ibNode.timeTickStream.Produce(&msgPack)
 }

-func (ibNode *insertBufferNode) updateSegStatistics(segIDs []UniqueID) error {
+func (ibNode *insertBufferNode) uploadMemStates2Coord(segIDs []UniqueID) error {
 	log.Debug("Updating segments statistics...")
 	statsUpdates := make([]*internalpb.SegmentStatisticsUpdates, 0, len(segIDs))
 	for _, segID := range segIDs {
--- a/internal/datanode/flow_graph_insert_buffer_node_test.go
+++ b/internal/datanode/flow_graph_insert_buffer_node_test.go
@ -111,7 +111,35 @@ func TestFlowGraphInsertBufferNodeCreate(t *testing.T) {
 	assert.Error(t, err)
 }

+type mockMsg struct{}
+
+func (*mockMsg) TimeTick() Timestamp {
+	return 0
+}
+
 func TestFlowGraphInsertBufferNode_Operate(t *testing.T) {
+	t.Run("Test iBNode Operate invalid Msg", func(te *testing.T) {
+		invalidInTests := []struct {
+			in          []Msg
+			description string
+		}{
+			{[]Msg{},
+				"Invalid input length == 0"},
+			{[]Msg{&insertMsg{}, &insertMsg{}, &insertMsg{}},
+				"Invalid input length == 3"},
+			{[]Msg{&mockMsg{}},
+				"Invalid input length == 1 but input message is not insertMsg"},
+		}
+
+		for _, test := range invalidInTests {
+			te.Run(test.description, func(t0 *testing.T) {
+				ibn := &insertBufferNode{}
+				rt := ibn.Operate(test.in)
+				assert.Empty(t0, rt)
+			})
+		}
+	})
+
 	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
 	defer cancel()

@ -681,3 +709,37 @@ func TestInsertBufferNode_bufferInsertMsg(t *testing.T) {
 		assert.NotNil(t, err)
 	}
 }
+
+func TestInsertBufferNode_updateSegStatesInReplica(te *testing.T) {
+	invalideTests := []struct {
+		replicaCollID UniqueID
+
+		inCollID    UniqueID
+		segID       UniqueID
+		description string
+	}{
+		{1, 9, 100, "collectionID mismatch"},
+	}
+
+	for _, test := range invalideTests {
+		ibNode := &insertBufferNode{
+			replica: newReplica(&RootCoordFactory{}, test.replicaCollID),
+		}
+
+		im := []*msgstream.InsertMsg{
+			{
+				InsertRequest: internalpb.InsertRequest{
+					CollectionID: test.inCollID,
+					SegmentID:    test.segID,
+				},
+			},
+		}
+
+		seg, err := ibNode.updateSegStatesInReplica(im, &internalpb.MsgPosition{}, &internalpb.MsgPosition{})
+
+		assert.Error(te, err)
+		assert.Empty(te, seg)
+
+	}
+
+}
--- a/internal/datanode/segment_replica.go
+++ b/internal/datanode/segment_replica.go
@ -49,7 +49,7 @@ type Replica interface {
 	updateSegmentPKRange(segID UniqueID, rowIDs []int64)
 	hasSegment(segID UniqueID, countFlushed bool) bool

-	updateStatistics(segID UniqueID, numRows int64) error
+	updateStatistics(segID UniqueID, numRows int64)
 	getSegmentStatisticsUpdates(segID UniqueID) (*internalpb.SegmentStatisticsUpdates, error)
 	segmentFlushed(segID UniqueID)
 }
@ -362,7 +362,7 @@ func (replica *SegmentReplica) hasSegment(segID UniqueID, countFlushed bool) boo
 }

 // updateStatistics updates the number of rows of a segment in replica.
-func (replica *SegmentReplica) updateStatistics(segID UniqueID, numRows int64) error {
+func (replica *SegmentReplica) updateStatistics(segID UniqueID, numRows int64) {
 	replica.segMu.Lock()
 	defer replica.segMu.Unlock()

@ -370,16 +370,16 @@ func (replica *SegmentReplica) updateStatistics(segID UniqueID, numRows int64) e
 	if seg, ok := replica.newSegments[segID]; ok {
 		seg.memorySize = 0
 		seg.numRows += numRows
-		return nil
+		return
 	}

 	if seg, ok := replica.normalSegments[segID]; ok {
 		seg.memorySize = 0
 		seg.numRows += numRows
-		return nil
+		return
 	}

-	return fmt.Errorf("There's no segment %v", segID)
+	log.Warn("update segment num row not exist", zap.Int64("segID", segID))
 }

 // getSegmentStatisticsUpdates gives current segment's statistics updates.
--- a/internal/datanode/segment_replica_test.go
+++ b/internal/datanode/segment_replica_test.go
@ -416,9 +416,8 @@ func TestSegmentReplica_InterfaceMethod(te *testing.T) {
 					sr.flushedSegments[test.flushedSegID] = &Segment{}
 				}

-				err := sr.updateStatistics(test.inSegID, test.inNumRows)
+				sr.updateStatistics(test.inSegID, test.inNumRows)
 				if test.isvalidCase {
-					assert.NoError(t, err)

 					updates, err := sr.getSegmentStatisticsUpdates(test.inSegID)
 					assert.NoError(t, err)
@ -427,7 +426,6 @@ func TestSegmentReplica_InterfaceMethod(te *testing.T) {

 					sr.updateSegmentCheckPoint(10000)
 				} else {
-					assert.Error(t, err)
 					updates, err := sr.getSegmentStatisticsUpdates(test.inSegID)
 					assert.Error(t, err)
 					assert.Nil(t, updates)
@ -501,8 +499,7 @@ func TestSegmentReplica_InterfaceMethod(te *testing.T) {
 		assert.True(t, seg.isNew.Load().(bool))
 		assert.False(t, seg.isFlushed.Load().(bool))

-		err = replica.updateStatistics(0, 10)
-		assert.NoError(t, err)
+		replica.updateStatistics(0, 10)
 		assert.Equal(t, int64(10), seg.numRows)

 		cpPos := &internalpb.MsgPosition{ChannelName: "insert-01", Timestamp: Timestamp(10)}
@ -527,8 +524,7 @@ func TestSegmentReplica_InterfaceMethod(te *testing.T) {
 		err = replica.addNormalSegment(1, 100000, 2, "invalid", int64(0), &segmentCheckPoint{})
 		assert.Error(t, err)

-		err = replica.updateStatistics(1, 10)
-		assert.NoError(t, err)
+		replica.updateStatistics(1, 10)
 		assert.Equal(t, int64(20), seg.numRows)

 		segPos := replica.listNewSegmentsStartPositions()