add auto flush for data node (#5609)

Signed-off-by: yefu.chen <yefu.chen@zilliz.com>
2024-12-01 11:29:48 +08:00 · 2021-06-04 16:31:34 +08:00 · 2021-06-04 16:31:34 +08:00 · 0a82c6381f
commit 0a82c6381f
parent 3a5c8c4d3a
7 changed files with 174 additions and 150 deletions
--- a/internal/datanode/collection_replica.go
+++ b/internal/datanode/collection_replica.go
@ -45,6 +45,9 @@ type Replica interface {
 	setStartPositions(segmentID UniqueID, startPos []*internalpb.MsgPosition) error
 	setEndPositions(segmentID UniqueID, endPos []*internalpb.MsgPosition) error
 	getSegmentPositions(segID UniqueID) ([]*internalpb.MsgPosition, []*internalpb.MsgPosition)
 	setSegmentCheckPoint(segID UniqueID)
 	listOpenSegmentCheckPoint() map[UniqueID]internalpb.MsgPosition
 	removeSegmentCheckPoint(segID UniqueID)
 }
 // Segment is the data structure of segments in data node replica.
@ -66,9 +69,10 @@ type CollectionSegmentReplica struct {
 	segments    map[UniqueID]*Segment
 	collections map[UniqueID]*Collection
-	posMu          sync.Mutex
+	posMu                 sync.Mutex
-	startPositions map[UniqueID][]*internalpb.MsgPosition
+	startPositions        map[UniqueID][]*internalpb.MsgPosition
-	endPositions   map[UniqueID][]*internalpb.MsgPosition
+	endPositions          map[UniqueID][]*internalpb.MsgPosition
 	openSegmentCheckPoint map[UniqueID]internalpb.MsgPosition
 }
 var _ Replica = &CollectionSegmentReplica{}
@ -78,10 +82,11 @@ func newReplica() Replica {
 	collections := make(map[UniqueID]*Collection)
 	var replica Replica = &CollectionSegmentReplica{
-		segments:       segments,
+		segments:              segments,
-		collections:    collections,
+		collections:           collections,
-		startPositions: make(map[UniqueID][]*internalpb.MsgPosition),
+		startPositions:        make(map[UniqueID][]*internalpb.MsgPosition),
-		endPositions:   make(map[UniqueID][]*internalpb.MsgPosition),
+		endPositions:          make(map[UniqueID][]*internalpb.MsgPosition),
 		openSegmentCheckPoint: make(map[UniqueID]internalpb.MsgPosition),
 	}
 	return replica
 }
@ -315,3 +320,23 @@ func (replica *CollectionSegmentReplica) getSegmentPositions(segID UniqueID) ([]
 	endPos := replica.endPositions[segID]
 	return startPos, endPos
 }
 func (replica *CollectionSegmentReplica) setSegmentCheckPoint(segID UniqueID) {
 	replica.posMu.Lock()
 	defer replica.posMu.Unlock()
 	ep := replica.endPositions[segID]
 	if len(ep) != 1 {
 		panic("msgstream's position should be 1")
 	}
 	replica.openSegmentCheckPoint[segID] = *ep[0]
 }
 func (replica *CollectionSegmentReplica) listOpenSegmentCheckPoint() map[UniqueID]internalpb.MsgPosition {
 	replica.posMu.Lock()
 	defer replica.posMu.Unlock()
 	return replica.openSegmentCheckPoint
 }
 func (replica *CollectionSegmentReplica) removeSegmentCheckPoint(segID UniqueID) {
 	replica.posMu.Lock()
 	defer replica.posMu.Unlock()
 	delete(replica.openSegmentCheckPoint, segID)
 }
--- a/internal/datanode/data_node.go
+++ b/internal/datanode/data_node.go
@ -326,6 +326,7 @@ func (node *DataNode) FlushSegments(ctx context.Context, req *datapb.FlushSegmen
 	}
 	log.Debug("FlushSegments ...", zap.Int("num", len(req.SegmentIDs)))
 	dmlFlushedCh := make(chan []*datapb.ID2PathList, len(req.SegmentIDs))
 	for _, id := range req.SegmentIDs {
 		chanName := node.getChannelName(id)
 		log.Info("vchannel", zap.String("name", chanName))
@ -343,8 +344,6 @@ func (node *DataNode) FlushSegments(ctx context.Context, req *datapb.FlushSegmen
 			return status, nil
 		}
 		dmlFlushedCh := make(chan []*datapb.ID2PathList)
 		flushmsg := &flushMsg{
 			msgID:        req.Base.MsgID,
 			timestamp:    req.Base.Timestamp,
@ -352,66 +351,22 @@ func (node *DataNode) FlushSegments(ctx context.Context, req *datapb.FlushSegmen
 			collectionID: req.CollectionID,
 			dmlFlushedCh: dmlFlushedCh,
 		}
 		flushCh <- flushmsg
-		waitReceive := func(wg *sync.WaitGroup, flushedCh interface{}, req *datapb.SaveBinlogPathsRequest) {
+	}
-			defer wg.Done()
+	failedSegments := ""
-			log.Debug("Inside waitReceive")
+	for range req.SegmentIDs {
-			switch Ch := flushedCh.(type) {
+		msg := <-dmlFlushedCh
-			case chan []*datapb.ID2PathList:
+		if len(msg) != 1 {
-				select {
+			panic("flush size expect to 1")
 				case <-time.After(300 * time.Second):
 					return
 				case meta := <-Ch:
 					if meta == nil {
 						log.Info("Dml messages flush failed!")
 						// Modify req to confirm failure
 						return
 					}
 					// Modify req with valid dml binlog paths
 					req.Field2BinlogPaths = meta
 					log.Info("Insert messeges flush done!", zap.Any("Binlog paths", meta))
 				}
 			default:
 				log.Error("Not supported type")
 			}
 		}
-
+		if msg[0].Paths == nil {
-		req := &datapb.SaveBinlogPathsRequest{
+			failedSegments += fmt.Sprintf(" %d", msg[0].ID)
 			Base:         &commonpb.MsgBase{},
 			SegmentID:    id,
 			CollectionID: req.CollectionID,
 		}
-
+	}
-		// TODO Set start_positions and end_positions
+	if len(failedSegments) != 0 {
-
+		status.Reason = fmt.Sprintf("flush failed segment list = %s", failedSegments)
-		log.Info("Waiting for flush completed", zap.Int64("segmentID", id))
+		return status, nil
 		go func() {
 			flushCh <- flushmsg
 			var wg sync.WaitGroup
 			wg.Add(1)
 			go waitReceive(&wg, dmlFlushedCh, req)
 			wg.Wait()
 			log.Info("Notify DataService BinlogPaths and Positions")
 			status, err := node.dataService.SaveBinlogPaths(node.ctx, req)
 			if err != nil {
 				log.Error("DataNode or DataService abnormal, restarting DataNode")
 				// TODO restart
 				return
 			}
 			if status.ErrorCode != commonpb.ErrorCode_Success {
 				log.Error("Save paths failed, resending request",
 					zap.String("error message", status.GetReason()))
 				// TODO resend
 				return
 			}
 		}()
 	}
 	status.ErrorCode = commonpb.ErrorCode_Success
--- a/internal/datanode/data_node_test.go
+++ b/internal/datanode/data_node_test.go
@ -35,6 +35,7 @@ func TestMain(t *testing.M) {
 }
 func TestDataNode(t *testing.T) {
 	t.Skip()
 	node := newIDLEDataNodeMock()
 	node.Start()
--- a/internal/datanode/data_sync_service.go
+++ b/internal/datanode/data_sync_service.go
@ -85,7 +85,14 @@ func (dsService *dataSyncService) initNodes(vchanPair *datapb.VchannelInfo) {
 	var dmStreamNode Node = newDmInputNode(dsService.ctx, dsService.msFactory, vchanPair.GetChannelName(), vchanPair.GetCheckPoints())
 	var ddNode Node = newDDNode()
-	var insertBufferNode Node = newInsertBufferNode(dsService.ctx, dsService.replica, dsService.msFactory, dsService.idAllocator, dsService.flushChan)
+	var insertBufferNode Node = newInsertBufferNode(
 		dsService.ctx,
 		dsService.replica,
 		dsService.msFactory,
 		dsService.idAllocator,
 		dsService.flushChan,
 		nil, //TODO,=================== call data service save binlog =========
 	)
 	dsService.fg.AddNode(dmStreamNode)
 	dsService.fg.AddNode(ddNode)
--- a/internal/datanode/data_sync_service_test.go
+++ b/internal/datanode/data_sync_service_test.go
@ -27,6 +27,7 @@ import (
 // NOTE: start pulsar before test
 func TestDataSyncService_Start(t *testing.T) {
 	t.Skip()
 	const ctxTimeInMillisecond = 2000
 	delay := time.Now().Add(ctxTimeInMillisecond * time.Millisecond)
--- a/internal/datanode/flow_graph_insert_buffer_node.go
+++ b/internal/datanode/flow_graph_insert_buffer_node.go
@ -15,7 +15,6 @@ import (
 	"bytes"
 	"context"
 	"encoding/binary"
 	"errors"
 	"path"
 	"strconv"
 	"sync"
@ -60,6 +59,16 @@ type insertBufferNode struct {
 	timeTickStream          msgstream.MsgStream
 	segmentStatisticsStream msgstream.MsgStream
 	dsSaveBinlog func(fu *autoFlushUnit) error
 }
 type autoFlushUnit struct {
 	segID              UniqueID
 	numRows            int64
 	field2Path         map[UniqueID]string
 	openSegCheckpoints map[UniqueID]internalpb.MsgPosition
 	flushed            bool
 }
 type insertBuffer struct {
@ -140,6 +149,8 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 			}
 			// set msg pack start positions
 			// this position is the start position of current segment, not start position of current MsgPack
 			// so setStartPositions will only call once when meet new segment
 			ibNode.replica.setStartPositions(currentSegID, iMsg.startPositions)
 		}
@ -211,7 +222,7 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 					if t.Key == "dim" {
 						dim, err = strconv.Atoi(t.Value)
 						if err != nil {
-							log.Error("strconv wrong")
+							log.Error("strconv wrong on get dim", zap.Error(err))
 						}
 						break
 					}
@ -457,6 +468,8 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 		}
 	}
 	finishCh := make(chan autoFlushUnit, len(segToUpdate))
 	finishCnt := sync.WaitGroup{}
 	for _, segToFlush := range segToUpdate {
 		// If full, auto flush
 		if ibNode.insertBuffer.full(segToFlush) {
@ -477,11 +490,29 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 				log.Error("Auto flush failed .. cannot get collection ID or partition ID..", zap.Error(err))
 				continue
 			}
 			finishCnt.Add(1)
 			finishCh := make(chan map[UniqueID]string)
 			go flushSegment(collMeta, segToFlush, partitionID, collID,
-				&ibNode.flushMap, ibNode.minIOKV, finishCh, ibNode.idAllocator)
+				&ibNode.flushMap, ibNode.minIOKV, finishCh, &finishCnt, ibNode.replica, ibNode.idAllocator)
-			go ibNode.bufferAutoFlushPaths(finishCh, segToFlush)
+		}
 	}
 	finishCnt.Wait()
 	close(finishCh)
 	for fu := range finishCh {
 		if fu.field2Path == nil {
 			log.Debug("segment is empty")
 			continue
 		}
 		segSta, err := ibNode.replica.getSegmentStatisticsUpdates(fu.segID)
 		if err != nil {
 			log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
 			continue
 		}
 		fu.numRows = segSta.NumRows
 		fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint()
 		fu.flushed = false
 		if ibNode.dsSaveBinlog(&fu) != nil {
 			log.Debug("data service save bin log path failed", zap.Error(err))
 		}
 	}
@ -492,20 +523,34 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 		currentSegID := fmsg.segmentID
 		log.Debug(". Receiving flush message", zap.Int64("segmentID", currentSegID))
-		finishCh := make(chan map[UniqueID]string)
+		segSta, err := ibNode.replica.getSegmentStatisticsUpdates(currentSegID)
-		go ibNode.completeFlush(currentSegID, finishCh, fmsg.dmlFlushedCh)
+		if err != nil {
 			log.Debug("getSegmentStatisticsUpdates failed", zap.Error(err))
 			break
 		}
 		if ibNode.insertBuffer.size(currentSegID) <= 0 {
 			log.Debug(".. Buffer empty ...")
-			finishCh <- make(map[UniqueID]string)
+			ibNode.dsSaveBinlog(&autoFlushUnit{
 				segID:              currentSegID,
 				numRows:            segSta.NumRows,
 				field2Path:         nil,
 				openSegCheckpoints: ibNode.replica.listOpenSegmentCheckPoint(),
 				flushed:            true,
 			})
 			fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: []string{}}}
 		} else {
 			log.Debug(".. Buffer not empty, flushing ..")
 			finishCh := make(chan autoFlushUnit, 1)
 			ibNode.flushMap.Store(currentSegID, ibNode.insertBuffer.insertData[currentSegID])
 			delete(ibNode.insertBuffer.insertData, currentSegID)
 			clearFn := func() {
-				finishCh <- nil
+				finishCh <- autoFlushUnit{field2Path: nil}
 				log.Debug(".. Clearing flush Buffer ..")
 				ibNode.flushMap.Delete(currentSegID)
 				close(finishCh)
 				fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: nil}}
 			}
 			var collMeta *etcdpb.CollectionMeta
@ -514,6 +559,7 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 			if err != nil {
 				log.Error("Flush failed .. cannot get segment ..", zap.Error(err))
 				clearFn()
 				break
 				// TODO add error handling
 			}
@ -521,6 +567,7 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 			if err != nil {
 				log.Error("Flush failed .. cannot get collection schema ..", zap.Error(err))
 				clearFn()
 				break
 				// TODO add error handling
 			}
@ -529,17 +576,31 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 				ID:     seg.collectionID,
 			}
-			go flushSegment(collMeta, currentSegID, seg.partitionID, seg.collectionID,
+			flushSegment(collMeta, currentSegID, seg.partitionID, seg.collectionID,
-				&ibNode.flushMap, ibNode.minIOKV, finishCh, ibNode.idAllocator)
+				&ibNode.flushMap, ibNode.minIOKV, finishCh, nil, ibNode.replica, ibNode.idAllocator)
 			fu := <-finishCh
 			close(finishCh)
 			if fu.field2Path != nil {
 				fu.numRows = segSta.NumRows
 				fu.openSegCheckpoints = ibNode.replica.listOpenSegmentCheckPoint()
 				fu.flushed = true
 				if ibNode.dsSaveBinlog(&fu) != nil {
 					log.Debug("data service save bin log path failed", zap.Error(err))
 				} else {
 					// this segment has flushed, so it's not `open segment`, so remove from the check point
 					ibNode.replica.removeSegmentCheckPoint(fu.segID)
 				}
 			}
 			fmsg.dmlFlushedCh <- []*datapb.ID2PathList{{ID: currentSegID, Paths: []string{}}}
 		}
 	default:
 	}
 	// TODO write timetick
-	// if err := ibNode.writeHardTimeTick(iMsg.timeRange.timestampMax); err != nil {
+	if err := ibNode.writeHardTimeTick(iMsg.timeRange.timestampMax); err != nil {
-	//     log.Error("send hard time tick into pulsar channel failed", zap.Error(err))
+		log.Error("send hard time tick into pulsar channel failed", zap.Error(err))
-	// }
+	}
 	for _, sp := range spans {
 		sp.Finish()
@ -549,11 +610,15 @@ func (ibNode *insertBufferNode) Operate(in []flowgraph.Msg) []flowgraph.Msg {
 }
 func flushSegment(collMeta *etcdpb.CollectionMeta, segID, partitionID, collID UniqueID,
-	insertData *sync.Map, kv kv.BaseKV, field2PathCh chan<- map[UniqueID]string, idAllocator allocatorInterface) {
+	insertData *sync.Map, kv kv.BaseKV, flushUnit chan<- autoFlushUnit, wgFinish *sync.WaitGroup,
 	replica Replica, idAllocator allocatorInterface) {
 	if wgFinish != nil {
 		defer wgFinish.Done()
 	}
 	clearFn := func(isSuccess bool) {
 		if !isSuccess {
-			field2PathCh <- nil
+			flushUnit <- autoFlushUnit{field2Path: nil}
 		}
 		log.Debug(".. Clearing flush Buffer ..")
@ -586,6 +651,7 @@ func flushSegment(collMeta *etcdpb.CollectionMeta, segID, partitionID, collID Un
 	// write insert binlog
 	for _, blob := range binLogs {
 		fieldID, err := strconv.ParseInt(blob.GetKey(), 10, 64)
 		log.Debug("save binlog", zap.Int64("fieldID", fieldID))
 		if err != nil {
 			log.Error("Flush failed ... cannot parse string to fieldID ..", zap.Error(err))
 			clearFn(false)
@ -626,6 +692,7 @@ func flushSegment(collMeta *etcdpb.CollectionMeta, segID, partitionID, collID Un
 		key := path.Join(Params.StatsBinlogRootPath, k)
 		kvs[key] = string(blob.Value[:])
 	}
 	log.Debug("save binlog file to MinIO/S3")
 	err = kv.MultiSave(kvs)
 	if err != nil {
@ -635,50 +702,11 @@ func flushSegment(collMeta *etcdpb.CollectionMeta, segID, partitionID, collID Un
 		return
 	}
-	field2PathCh <- field2Path
+	replica.setSegmentCheckPoint(segID)
 	flushUnit <- autoFlushUnit{segID: segID, field2Path: field2Path}
 	clearFn(true)
 }
 func (ibNode *insertBufferNode) bufferAutoFlushPaths(wait <-chan map[UniqueID]string, segID UniqueID) error {
 	field2Path := <-wait
 	if field2Path == nil {
 		return errors.New("Nil field2Path")
 	}
 	return ibNode.replica.bufferAutoFlushBinlogPaths(segID, field2Path)
 }
 func (ibNode *insertBufferNode) completeFlush(segID UniqueID, wait <-chan map[UniqueID]string, dmlFlushedCh chan<- []*datapb.ID2PathList) {
 	field2Path := <-wait
 	if field2Path == nil {
 		dmlFlushedCh <- nil
 		return
 	}
 	ibNode.replica.bufferAutoFlushBinlogPaths(segID, field2Path)
 	bufferField2Paths, err := ibNode.replica.getBufferPaths(segID)
 	if err != nil {
 		log.Error("Flush failed ... cannot get buffered paths", zap.Error(err))
 		dmlFlushedCh <- nil
 	}
 	binlogPaths := make([]*datapb.ID2PathList, 0, len(bufferField2Paths))
 	for k, paths := range bufferField2Paths {
 		binlogPaths = append(binlogPaths, &datapb.ID2PathList{
 			ID:    k,
 			Paths: paths,
 		})
 	}
 	dmlFlushedCh <- binlogPaths
 	log.Debug(".. Segment flush completed ..")
 	ibNode.updateSegStatistics([]UniqueID{segID})
 }
 func (ibNode *insertBufferNode) writeHardTimeTick(ts Timestamp) error {
 	msgPack := msgstream.MsgPack{}
 	timeTickMsg := msgstream.TimeTickMsg{
@ -769,7 +797,15 @@ func (ibNode *insertBufferNode) getCollectionandPartitionIDbySegID(segmentID Uni
 	return
 }
-func newInsertBufferNode(ctx context.Context, replica Replica, factory msgstream.Factory, idAllocator allocatorInterface, flushCh <-chan *flushMsg) *insertBufferNode {
+func newInsertBufferNode(
 	ctx context.Context,
 	replica Replica,
 	factory msgstream.Factory,
 	idAllocator allocatorInterface,
 	flushCh <-chan *flushMsg,
 	saveBinlog func(*autoFlushUnit) error,
 ) *insertBufferNode {
 	maxQueueLength := Params.FlowGraphMaxQueueLength
 	maxParallelism := Params.FlowGraphMaxParallelism
@ -820,9 +856,10 @@ func newInsertBufferNode(ctx context.Context, replica Replica, factory msgstream
 		timeTickStream:          wTtMsgStream,
 		segmentStatisticsStream: segStatisticsMsgStream,
-		replica:     replica,
+		replica:      replica,
-		flushMap:    sync.Map{},
+		flushMap:     sync.Map{},
-		flushChan:   flushCh,
+		flushChan:    flushCh,
-		idAllocator: idAllocator,
+		idAllocator:  idAllocator,
 		dsSaveBinlog: saveBinlog,
 	}
 }
--- a/internal/datanode/flow_graph_insert_buffer_node_test.go
+++ b/internal/datanode/flow_graph_insert_buffer_node_test.go
@ -37,18 +37,8 @@ import (
 )
 func TestFlowGraphInsertBufferNode_Operate(t *testing.T) {
-	const ctxTimeInMillisecond = 2000
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
-	const closeWithDeadline = false
+	defer cancel()
 	var ctx context.Context
 	if closeWithDeadline {
 		var cancel context.CancelFunc
 		d := time.Now().Add(ctxTimeInMillisecond * time.Millisecond)
 		ctx, cancel = context.WithDeadline(context.Background(), d)
 		defer cancel()
 	} else {
 		ctx = context.Background()
 	}
 	testPath := "/test/datanode/root/meta"
 	err := clearEtcd(testPath)
@ -72,10 +62,15 @@ func TestFlowGraphInsertBufferNode_Operate(t *testing.T) {
 	err = msFactory.SetParams(m)
 	assert.Nil(t, err)
-	flushChan := make(chan *flushMsg, 100)
+	saveBinlog := func(fu *autoFlushUnit) error {
-	iBNode := newInsertBufferNode(ctx, replica, msFactory, NewAllocatorFactory(), flushChan)
+		t.Log(fu)
 		return nil
 	}
-	dmlFlushedCh := make(chan []*datapb.ID2PathList)
+	flushChan := make(chan *flushMsg, 100)
 	iBNode := newInsertBufferNode(ctx, replica, msFactory, NewAllocatorFactory(), flushChan, saveBinlog)
 	dmlFlushedCh := make(chan []*datapb.ID2PathList, 1)
 	flushChan <- &flushMsg{
 		msgID:        1,
@ -137,8 +132,10 @@ func TestFlushSegment(t *testing.T) {
 	collMeta := genCollectionMeta(collectionID, "test_flush_segment_txn")
 	flushMap := sync.Map{}
 	replica := newReplica()
 	replica.setEndPositions(segmentID, []*internalpb.MsgPosition{{ChannelName: "TestChannel"}})
-	finishCh := make(chan map[UniqueID]string)
+	finishCh := make(chan autoFlushUnit, 1)
 	insertData := &InsertData{
 		Data: make(map[storage.FieldID]storage.FieldData),
@ -157,11 +154,6 @@ func TestFlushSegment(t *testing.T) {
 	}
 	flushMap.Store(segmentID, insertData)
 	go func(wait <-chan map[UniqueID]string) {
 		field2Path := <-wait
 		assert.NotNil(t, field2Path)
 	}(finishCh)
 	flushSegment(collMeta,
 		segmentID,
 		partitionID,
@ -169,8 +161,14 @@ func TestFlushSegment(t *testing.T) {
 		&flushMap,
 		mockMinIO,
 		finishCh,
 		nil,
 		replica,
 		idAllocMock)
 	fu := <-finishCh
 	assert.NotNil(t, fu.field2Path)
 	assert.Equal(t, fu.segID, segmentID)
 	k, _ := idAllocMock.genKey(false, collectionID, partitionID, segmentID, 0)
 	key := path.Join(Params.StatsBinlogRootPath, k)
 	_, values, _ := mockMinIO.LoadWithPrefix(key)