SyncReplicaSegments syncs all segments (#17774)

Signed-off-by: yah01 <yang.cen@zilliz.com>
This commit is contained in:
yah01 2022-06-24 23:24:15 +08:00 committed by GitHub
parent 16c3aedc15
commit 8388478ef3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 278 additions and 257 deletions

View File

@ -356,17 +356,7 @@ func (lct *loadCollectionTask) updateTaskProcess() {
} }
if allDone { if allDone {
err := syncReplicaSegments(lct.ctx, lct.cluster, childTasks) err := lct.meta.setLoadPercentage(collectionID, 0, 100, querypb.LoadType_LoadCollection)
if err != nil {
log.Error("loadCollectionTask: failed to sync replica segments to shard leader",
zap.Int64("taskID", lct.getTaskID()),
zap.Int64("collectionID", collectionID),
zap.Error(err))
lct.setResultInfo(err)
return
}
err = lct.meta.setLoadPercentage(collectionID, 0, 100, querypb.LoadType_LoadCollection)
if err != nil { if err != nil {
log.Error("loadCollectionTask: set load percentage to meta's collectionInfo", zap.Int64("collectionID", collectionID)) log.Error("loadCollectionTask: set load percentage to meta's collectionInfo", zap.Int64("collectionID", collectionID))
lct.setResultInfo(err) lct.setResultInfo(err)
@ -609,6 +599,32 @@ func (lct *loadCollectionTask) postExecute(ctx context.Context) error {
return nil return nil
} }
func (lct *loadCollectionTask) globalPostExecute(ctx context.Context) error {
collection, err := lct.meta.getCollectionInfoByID(lct.CollectionID)
if err != nil {
log.Error("loadCollectionTask: failed to get collection info from meta",
zap.Int64("taskID", lct.getTaskID()),
zap.Int64("collectionID", lct.CollectionID),
zap.Error(err))
return err
}
for _, replica := range collection.ReplicaIds {
err := syncReplicaSegments(lct.ctx, lct.meta, lct.cluster, replica)
if err != nil {
log.Error("loadCollectionTask: failed to sync replica segments to shard leader",
zap.Int64("taskID", lct.getTaskID()),
zap.Int64("collectionID", lct.CollectionID),
zap.Error(err))
return err
}
}
return nil
}
func (lct *loadCollectionTask) rollBack(ctx context.Context) []task { func (lct *loadCollectionTask) rollBack(ctx context.Context) []task {
onlineNodeIDs := lct.cluster.OnlineNodeIDs() onlineNodeIDs := lct.cluster.OnlineNodeIDs()
resultTasks := make([]task, 0) resultTasks := make([]task, 0)
@ -804,16 +820,6 @@ func (lpt *loadPartitionTask) updateTaskProcess() {
} }
} }
if allDone { if allDone {
err := syncReplicaSegments(lpt.ctx, lpt.cluster, childTasks)
if err != nil {
log.Error("loadPartitionTask: failed to sync replica segments to shard leader",
zap.Int64("taskID", lpt.getTaskID()),
zap.Int64("collectionID", collectionID),
zap.Error(err))
lpt.setResultInfo(err)
return
}
for _, id := range partitionIDs { for _, id := range partitionIDs {
err := lpt.meta.setLoadPercentage(collectionID, id, 100, querypb.LoadType_LoadPartition) err := lpt.meta.setLoadPercentage(collectionID, id, 100, querypb.LoadType_LoadPartition)
if err != nil { if err != nil {
@ -1049,6 +1055,34 @@ func (lpt *loadPartitionTask) postExecute(ctx context.Context) error {
return nil return nil
} }
func (lpt *loadPartitionTask) globalPostExecute(ctx context.Context) error {
collectionID := lpt.CollectionID
collection, err := lpt.meta.getCollectionInfoByID(collectionID)
if err != nil {
log.Error("loadPartitionTask: failed to get collection info from meta",
zap.Int64("taskID", lpt.getTaskID()),
zap.Int64("collectionID", collectionID),
zap.Error(err))
return err
}
for _, replica := range collection.ReplicaIds {
err := syncReplicaSegments(lpt.ctx, lpt.meta, lpt.cluster, replica)
if err != nil {
log.Error("loadPartitionTask: failed to sync replica segments to shard leader",
zap.Int64("taskID", lpt.getTaskID()),
zap.Int64("collectionID", collectionID),
zap.Error(err))
return err
}
}
return nil
}
func (lpt *loadPartitionTask) rollBack(ctx context.Context) []task { func (lpt *loadPartitionTask) rollBack(ctx context.Context) []task {
collectionID := lpt.CollectionID collectionID := lpt.CollectionID
resultTasks := make([]task, 0) resultTasks := make([]task, 0)
@ -2264,7 +2298,10 @@ func (lbt *loadBalanceTask) postExecute(context.Context) error {
} }
func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error { func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
if len(lbt.getChildTask()) > 0 { if lbt.BalanceReason != querypb.TriggerCondition_NodeDown {
return nil
}
replicas := make(map[UniqueID]*milvuspb.ReplicaInfo) replicas := make(map[UniqueID]*milvuspb.ReplicaInfo)
segments := make(map[UniqueID]*querypb.SegmentInfo) segments := make(map[UniqueID]*querypb.SegmentInfo)
dmChannels := make(map[string]*querypb.DmChannelWatchInfo) dmChannels := make(map[string]*querypb.DmChannelWatchInfo)
@ -2297,7 +2334,6 @@ func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
) )
wg := errgroup.Group{} wg := errgroup.Group{}
if lbt.triggerCondition == querypb.TriggerCondition_NodeDown {
// Remove offline nodes from replica // Remove offline nodes from replica
for replicaID := range replicas { for replicaID := range replicas {
replicaID := replicaID replicaID := replicaID
@ -2330,39 +2366,8 @@ func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
return nil return nil
}) })
} }
}
// Remove offline nodes from segment // Update shard leaders for replicas
// for _, segment := range segments {
// segment := segment
// wg.Go(func() error {
// segment.NodeID = -1
// segment.NodeIds = removeFromSlice(segment.NodeIds, lbt.SourceNodeIDs...)
// err := lbt.meta.saveSegmentInfo(segment)
// if err != nil {
// log.Error("failed to remove offline nodes from segment info",
// zap.Int64("segmentID", segment.SegmentID),
// zap.Error(err))
// return err
// }
// log.Info("remove offline nodes from segment",
// zap.Int64("taskID", lbt.getTaskID()),
// zap.Int64("segmentID", segment.GetSegmentID()),
// zap.Int64s("nodeIds", segment.GetNodeIds()))
// return nil
// })
// }
// Wait for the previous goroutines,
// which conflicts with the code below due to modifing replicas
err := wg.Wait()
if err != nil {
return err
}
for _, childTask := range lbt.getChildTask() { for _, childTask := range lbt.getChildTask() {
if task, ok := childTask.(*watchDmChannelTask); ok { if task, ok := childTask.(*watchDmChannelTask); ok {
wg.Go(func() error { wg.Go(func() error {
@ -2401,21 +2406,28 @@ func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
}) })
} }
} }
err = wg.Wait()
err := wg.Wait()
if err != nil { if err != nil {
return err return err
} }
err = syncReplicaSegments(ctx, lbt.cluster, lbt.getChildTask()) for replicaID := range replicas {
shards := make([]string, 0, len(dmChannels))
for _, dmc := range dmChannels {
shards = append(shards, dmc.DmChannel)
}
err := syncReplicaSegments(lbt.ctx, lbt.meta, lbt.cluster, replicaID, shards...)
if err != nil { if err != nil {
log.Error("loadBalanceTask: failed to sync segments distribution",
zap.Int64("collectionID", lbt.CollectionID),
zap.Int64("replicaID", lbt.replicaID),
zap.Error(err))
return err return err
} }
} }
// if loadBalanceTask execute failed after query node down, the lbt.getResultInfo().ErrorCode will be set to commonpb.ErrorCode_UnexpectedError
// then the queryCoord will panic, and the nodeInfo should not be removed immediately
// after queryCoord recovery, the balanceTask will redo
if lbt.BalanceReason == querypb.TriggerCondition_NodeDown {
for _, offlineNodeID := range lbt.SourceNodeIDs { for _, offlineNodeID := range lbt.SourceNodeIDs {
err := lbt.cluster.RemoveNodeInfo(offlineNodeID) err := lbt.cluster.RemoveNodeInfo(offlineNodeID)
if err != nil { if err != nil {
@ -2426,7 +2438,6 @@ func (lbt *loadBalanceTask) globalPostExecute(ctx context.Context) error {
return err return err
} }
} }
}
return nil return nil
} }

View File

@ -660,12 +660,26 @@ func (scheduler *TaskScheduler) scheduleLoop() {
processInternalTaskFn(derivedInternalTasks, triggerTask) processInternalTaskFn(derivedInternalTasks, triggerTask)
} }
} }
}
// triggerTask may be LoadCollection, LoadPartitions, LoadBalance, Handoff
if triggerTask.getResultInfo().ErrorCode == commonpb.ErrorCode_Success || triggerTask.getTriggerCondition() == querypb.TriggerCondition_NodeDown {
err = updateSegmentInfoFromTask(scheduler.ctx, triggerTask, scheduler.meta)
if err != nil {
triggerTask.setResultInfo(err)
}
}
if triggerTask.getResultInfo().ErrorCode == commonpb.ErrorCode_Success {
err = triggerTask.globalPostExecute(triggerTask.traceCtx())
if err != nil {
log.Error("scheduleLoop: failed to execute globalPostExecute() of task",
zap.Int64("taskID", triggerTask.getTaskID()),
zap.Error(err))
triggerTask.setResultInfo(err)
}
}
//TODO::xige-16, judging the triggerCondition is ugly, the taskScheduler will be refactored soon
// if query node down, the loaded segment and watched dmChannel by the node should be balance to new querynode
// if triggerCondition == NodeDown, loadSegment and watchDmchannel request will keep reschedule until the success
// the node info has been deleted after assgining child task to triggerTask
// so it is necessary to update the meta of segment and dmchannel, or some data may be lost in meta
resultInfo := triggerTask.getResultInfo() resultInfo := triggerTask.getResultInfo()
if resultInfo.ErrorCode != commonpb.ErrorCode_Success { if resultInfo.ErrorCode != commonpb.ErrorCode_Success {
if !alreadyNotify { if !alreadyNotify {
@ -682,25 +696,6 @@ func (scheduler *TaskScheduler) scheduleLoop() {
// if childTask still execute failed, then reProduce rollBacked tasks // if childTask still execute failed, then reProduce rollBacked tasks
processInternalTaskFn(rollBackTasks, triggerTask) processInternalTaskFn(rollBackTasks, triggerTask)
} }
}
if triggerTask.getResultInfo().ErrorCode == commonpb.ErrorCode_Success {
err = triggerTask.globalPostExecute(triggerTask.traceCtx())
if err != nil {
log.Error("scheduleLoop: failed to execute globalPostExecute() of task",
zap.Int64("taskID", triggerTask.getTaskID()),
zap.Error(err))
triggerTask.setResultInfo(err)
}
}
// triggerTask may be LoadCollection, LoadPartitions, LoadBalance, Handoff
if triggerTask.getResultInfo().ErrorCode == commonpb.ErrorCode_Success || triggerTask.getTriggerCondition() == querypb.TriggerCondition_NodeDown {
err = updateSegmentInfoFromTask(scheduler.ctx, triggerTask, scheduler.meta)
if err != nil {
triggerTask.setResultInfo(err)
}
}
err = removeTaskFromKVFn(triggerTask) err = removeTaskFromKVFn(triggerTask)
if err != nil { if err != nil {

View File

@ -18,12 +18,15 @@ package querycoord
import ( import (
"context" "context"
"sort"
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/commonpb" "github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/milvuspb" "github.com/milvus-io/milvus/internal/proto/milvuspb"
"github.com/milvus-io/milvus/internal/proto/querypb" "github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/util/typeutil" "github.com/milvus-io/milvus/internal/util/typeutil"
"go.uber.org/zap"
) )
func getCompareMapFromSlice(sliceData []int64) map[int64]struct{} { func getCompareMapFromSlice(sliceData []int64) map[int64]struct{} {
@ -106,95 +109,107 @@ func getDstNodeIDByTask(t task) int64 {
return nodeID return nodeID
} }
func syncReplicaSegments(ctx context.Context, cluster Cluster, childTasks []task) error { // syncReplicaSegments syncs the segments distribution of replica to shard leaders
type SegmentIndex struct { // only syncs the segments in shards if not nil
NodeID UniqueID func syncReplicaSegments(ctx context.Context, meta Meta, cluster Cluster, replicaID UniqueID, shards ...string) error {
PartitionID UniqueID replica, err := meta.getReplicaByID(replicaID)
ReplicaID UniqueID if err != nil {
return err
} }
type ShardLeader struct { collectionSegments := make(map[UniqueID]*querypb.SegmentInfo)
ReplicaID UniqueID for _, segment := range meta.showSegmentInfos(replica.CollectionID, nil) {
LeaderID UniqueID collectionSegments[segment.SegmentID] = segment
} }
shardSegments := make(map[string]map[SegmentIndex]typeutil.UniqueSet) // DMC -> set[Segment] shardSegments := make(map[string][]*querypb.SegmentInfo) // DMC -> []SegmentInfo
shardLeaders := make(map[string][]*ShardLeader) // DMC -> leader for _, segment := range collectionSegments {
for _, childTask := range childTasks { // Group segments by shard
switch task := childTask.(type) { segments, ok := shardSegments[segment.DmChannel]
case *loadSegmentTask:
nodeID := getDstNodeIDByTask(task)
for _, segment := range task.Infos {
segments, ok := shardSegments[segment.InsertChannel]
if !ok { if !ok {
segments = make(map[SegmentIndex]typeutil.UniqueSet) segments = make([]*querypb.SegmentInfo, 0)
} }
index := SegmentIndex{ segments = append(segments, segment)
NodeID: nodeID, shardSegments[segment.DmChannel] = segments
PartitionID: segment.PartitionID,
ReplicaID: task.ReplicaID,
} }
_, ok = segments[index] for _, shard := range replica.ShardReplicas {
if !ok { if len(shards) > 0 && !isInShards(shard.DmChannelName, shards) {
segments[index] = make(typeutil.UniqueSet) continue
}
segments[index].Insert(segment.SegmentID)
shardSegments[segment.InsertChannel] = segments
} }
case *watchDmChannelTask: segments := shardSegments[shard.DmChannelName]
leaderID := getDstNodeIDByTask(task)
leader := &ShardLeader{
ReplicaID: task.ReplicaID,
LeaderID: leaderID,
}
for _, dmc := range task.Infos {
leaders, ok := shardLeaders[dmc.ChannelName]
if !ok {
leaders = make([]*ShardLeader, 0)
}
leaders = append(leaders, leader)
shardLeaders[dmc.ChannelName] = leaders
}
}
}
for dmc, leaders := range shardLeaders {
// invoke sync segments even no segment
segments := shardSegments[dmc]
for _, leader := range leaders {
req := querypb.SyncReplicaSegmentsRequest{ req := querypb.SyncReplicaSegmentsRequest{
VchannelName: dmc, VchannelName: shard.DmChannelName,
ReplicaSegments: make([]*querypb.ReplicaSegmentsInfo, 0, len(segments)), ReplicaSegments: make([]*querypb.ReplicaSegmentsInfo, 0, len(segments)),
} }
for index, segmentSet := range segments { sort.Slice(segments, func(i, j int) bool {
if index.ReplicaID == leader.ReplicaID { inode := getNodeInReplica(replica, segments[i].NodeIds)
req.ReplicaSegments = append(req.ReplicaSegments, jnode := getNodeInReplica(replica, segments[j].NodeIds)
&querypb.ReplicaSegmentsInfo{
NodeId: index.NodeID, return inode < jnode ||
PartitionId: index.PartitionID, inode == jnode && segments[i].PartitionID < segments[j].PartitionID
SegmentIds: segmentSet.Collect(), })
for i, j := 0, 0; i < len(segments); i = j {
node := getNodeInReplica(replica, segments[i].NodeIds)
partition := segments[i].PartitionID
j++
for j < len(segments) &&
getNodeInReplica(replica, segments[j].NodeIds) == node &&
segments[j].PartitionID == partition {
j++
}
segmentIds := make([]UniqueID, 0, len(segments[i:j]))
for _, segment := range segments[i:j] {
segmentIds = append(segmentIds, segment.SegmentID)
}
req.ReplicaSegments = append(req.ReplicaSegments, &querypb.ReplicaSegmentsInfo{
NodeId: node,
PartitionId: partition,
SegmentIds: segmentIds,
}) })
} }
}
err := cluster.SyncReplicaSegments(ctx, leader.LeaderID, &req) log.Debug("sync replica segments",
zap.Int64("replicaID", replicaID),
zap.Int64("leaderID", shard.LeaderID),
zap.Any("req", req))
err := cluster.SyncReplicaSegments(ctx, shard.LeaderID, &req)
if err != nil { if err != nil {
return err return err
} }
} }
}
return nil return nil
} }
func isInShards(shard string, shards []string) bool {
for _, item := range shards {
if shard == item {
return true
}
}
return false
}
// getNodeInReplica gets the node which is in the replica
func getNodeInReplica(replica *milvuspb.ReplicaInfo, nodes []UniqueID) UniqueID {
for _, node := range nodes {
if nodeIncluded(node, replica.NodeIds) {
return node
}
}
return 0
}
func removeFromSlice(origin []UniqueID, del ...UniqueID) []UniqueID { func removeFromSlice(origin []UniqueID, del ...UniqueID) []UniqueID {
set := make(typeutil.UniqueSet, len(origin)) set := make(typeutil.UniqueSet, len(origin))
set.Insert(origin...) set.Insert(origin...)