2022-10-11 11:39:22 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
package balance
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
2023-04-03 14:16:25 +08:00
|
|
|
"fmt"
|
2022-09-15 18:48:32 +08:00
|
|
|
"time"
|
|
|
|
|
2023-04-06 19:14:32 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2023-07-12 09:12:28 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2023-04-03 14:16:25 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
2022-09-15 18:48:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/task"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2023-04-03 14:16:25 +08:00
|
|
|
const (
|
2023-05-04 12:22:40 +08:00
|
|
|
PlanInfoPrefix = "Balance-Plans:"
|
|
|
|
DistInfoPrefix = "Balance-Dists:"
|
2023-04-03 14:16:25 +08:00
|
|
|
)
|
|
|
|
|
2023-10-27 01:08:12 +08:00
|
|
|
func CreateSegmentTasksFromPlans(ctx context.Context, source task.Source, timeout time.Duration, plans []SegmentAssignPlan) []task.Task {
|
2022-09-15 18:48:32 +08:00
|
|
|
ret := make([]task.Task, 0)
|
|
|
|
for _, p := range plans {
|
|
|
|
actions := make([]task.Action, 0)
|
|
|
|
if p.To != -1 {
|
2023-07-12 09:12:28 +08:00
|
|
|
action := task.NewSegmentActionWithScope(p.To, task.ActionTypeGrow, p.Segment.GetInsertChannel(), p.Segment.GetID(), querypb.DataScope_Historical)
|
2022-09-15 18:48:32 +08:00
|
|
|
actions = append(actions, action)
|
|
|
|
}
|
|
|
|
if p.From != -1 {
|
2023-07-12 09:12:28 +08:00
|
|
|
action := task.NewSegmentActionWithScope(p.From, task.ActionTypeReduce, p.Segment.GetInsertChannel(), p.Segment.GetID(), querypb.DataScope_Historical)
|
2022-09-15 18:48:32 +08:00
|
|
|
actions = append(actions, action)
|
|
|
|
}
|
2023-04-11 14:38:30 +08:00
|
|
|
t, err := task.NewSegmentTask(
|
2022-09-15 18:48:32 +08:00
|
|
|
ctx,
|
|
|
|
timeout,
|
2023-10-27 01:08:12 +08:00
|
|
|
source,
|
2022-09-15 18:48:32 +08:00
|
|
|
p.Segment.GetCollectionID(),
|
2024-03-21 11:59:12 +08:00
|
|
|
p.Replica,
|
2022-09-15 18:48:32 +08:00
|
|
|
actions...,
|
|
|
|
)
|
2022-10-28 17:15:32 +08:00
|
|
|
if err != nil {
|
2023-03-14 18:51:53 +08:00
|
|
|
log.Warn("create segment task from plan failed",
|
2022-10-28 17:15:32 +08:00
|
|
|
zap.Int64("collection", p.Segment.GetCollectionID()),
|
2023-05-22 19:37:25 +08:00
|
|
|
zap.Int64("segmentID", p.Segment.GetID()),
|
2024-03-21 11:59:12 +08:00
|
|
|
zap.Int64("replica", p.Replica.GetID()),
|
2022-10-28 17:15:32 +08:00
|
|
|
zap.String("channel", p.Segment.GetInsertChannel()),
|
2023-03-14 18:51:53 +08:00
|
|
|
zap.Int64("from", p.From),
|
|
|
|
zap.Int64("to", p.To),
|
2022-10-28 17:15:32 +08:00
|
|
|
zap.Error(err),
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
}
|
2023-03-14 18:51:53 +08:00
|
|
|
|
|
|
|
log.Info("create segment task",
|
|
|
|
zap.Int64("collection", p.Segment.GetCollectionID()),
|
2023-05-22 19:37:25 +08:00
|
|
|
zap.Int64("segmentID", p.Segment.GetID()),
|
2024-03-21 11:59:12 +08:00
|
|
|
zap.Int64("replica", p.Replica.GetID()),
|
2023-03-14 18:51:53 +08:00
|
|
|
zap.String("channel", p.Segment.GetInsertChannel()),
|
2024-06-07 14:17:20 +08:00
|
|
|
zap.String("level", p.Segment.GetLevel().String()),
|
2023-03-14 18:51:53 +08:00
|
|
|
zap.Int64("from", p.From),
|
|
|
|
zap.Int64("to", p.To))
|
2023-04-11 14:38:30 +08:00
|
|
|
if task.GetTaskType(t) == task.TaskTypeMove {
|
|
|
|
// from balance checker
|
|
|
|
t.SetPriority(task.TaskPriorityLow)
|
|
|
|
} else {
|
2023-09-21 09:45:27 +08:00
|
|
|
// from segment checker
|
2023-04-11 14:38:30 +08:00
|
|
|
t.SetPriority(task.TaskPriorityNormal)
|
|
|
|
}
|
|
|
|
ret = append(ret, t)
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
return ret
|
|
|
|
}
|
|
|
|
|
2023-10-27 01:08:12 +08:00
|
|
|
func CreateChannelTasksFromPlans(ctx context.Context, source task.Source, timeout time.Duration, plans []ChannelAssignPlan) []task.Task {
|
2022-09-15 18:48:32 +08:00
|
|
|
ret := make([]task.Task, 0, len(plans))
|
|
|
|
for _, p := range plans {
|
|
|
|
actions := make([]task.Action, 0)
|
|
|
|
if p.To != -1 {
|
|
|
|
action := task.NewChannelAction(p.To, task.ActionTypeGrow, p.Channel.GetChannelName())
|
|
|
|
actions = append(actions, action)
|
|
|
|
}
|
|
|
|
if p.From != -1 {
|
|
|
|
action := task.NewChannelAction(p.From, task.ActionTypeReduce, p.Channel.GetChannelName())
|
|
|
|
actions = append(actions, action)
|
|
|
|
}
|
2024-03-21 11:59:12 +08:00
|
|
|
t, err := task.NewChannelTask(ctx, timeout, source, p.Channel.GetCollectionID(), p.Replica, actions...)
|
2022-10-28 17:15:32 +08:00
|
|
|
if err != nil {
|
2023-03-14 18:51:53 +08:00
|
|
|
log.Warn("create channel task failed",
|
2022-10-28 17:15:32 +08:00
|
|
|
zap.Int64("collection", p.Channel.GetCollectionID()),
|
2024-03-21 11:59:12 +08:00
|
|
|
zap.Int64("replica", p.Replica.GetID()),
|
2022-10-28 17:15:32 +08:00
|
|
|
zap.String("channel", p.Channel.GetChannelName()),
|
2023-03-14 18:51:53 +08:00
|
|
|
zap.Int64("from", p.From),
|
|
|
|
zap.Int64("to", p.To),
|
2022-10-28 17:15:32 +08:00
|
|
|
zap.Error(err),
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
}
|
2023-03-14 18:51:53 +08:00
|
|
|
|
|
|
|
log.Info("create channel task",
|
|
|
|
zap.Int64("collection", p.Channel.GetCollectionID()),
|
2024-03-21 11:59:12 +08:00
|
|
|
zap.Int64("replica", p.Replica.GetID()),
|
2023-03-14 18:51:53 +08:00
|
|
|
zap.String("channel", p.Channel.GetChannelName()),
|
|
|
|
zap.Int64("from", p.From),
|
|
|
|
zap.Int64("to", p.To))
|
2023-04-11 14:38:30 +08:00
|
|
|
t.SetPriority(task.TaskPriorityHigh)
|
|
|
|
ret = append(ret, t)
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
return ret
|
|
|
|
}
|
2023-04-03 14:16:25 +08:00
|
|
|
|
|
|
|
func PrintNewBalancePlans(collectionID int64, replicaID int64, segmentPlans []SegmentAssignPlan,
|
2023-09-21 09:45:27 +08:00
|
|
|
channelPlans []ChannelAssignPlan,
|
|
|
|
) {
|
2023-05-04 12:22:40 +08:00
|
|
|
balanceInfo := fmt.Sprintf("%s new plans:{collectionID:%d, replicaID:%d, ", PlanInfoPrefix, collectionID, replicaID)
|
2023-04-03 14:16:25 +08:00
|
|
|
for _, segmentPlan := range segmentPlans {
|
|
|
|
balanceInfo += segmentPlan.ToString()
|
|
|
|
}
|
|
|
|
for _, channelPlan := range channelPlans {
|
|
|
|
balanceInfo += channelPlan.ToString()
|
|
|
|
}
|
|
|
|
balanceInfo += "}"
|
|
|
|
log.Info(balanceInfo)
|
|
|
|
}
|
|
|
|
|
|
|
|
func PrintCurrentReplicaDist(replica *meta.Replica,
|
|
|
|
stoppingNodesSegments map[int64][]*meta.Segment, nodeSegments map[int64][]*meta.Segment,
|
2023-09-21 09:45:27 +08:00
|
|
|
channelManager *meta.ChannelDistManager, segmentDistMgr *meta.SegmentDistManager,
|
|
|
|
) {
|
2024-04-05 04:57:16 +08:00
|
|
|
distInfo := fmt.Sprintf("%s {collectionID:%d, replicaID:%d, ", DistInfoPrefix, replica.GetCollectionID(), replica.GetID())
|
2023-09-21 09:45:27 +08:00
|
|
|
// 1. print stopping nodes segment distribution
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += "[stoppingNodesSegmentDist:"
|
|
|
|
for stoppingNodeID, stoppedSegments := range stoppingNodesSegments {
|
|
|
|
distInfo += fmt.Sprintf("[nodeID:%d, ", stoppingNodeID)
|
|
|
|
distInfo += "stopped-segments:["
|
|
|
|
for _, stoppedSegment := range stoppedSegments {
|
|
|
|
distInfo += fmt.Sprintf("%d,", stoppedSegment.GetID())
|
|
|
|
}
|
|
|
|
distInfo += "]]"
|
|
|
|
}
|
2023-05-04 12:22:40 +08:00
|
|
|
distInfo += "]"
|
2023-09-21 09:45:27 +08:00
|
|
|
// 2. print normal nodes segment distribution
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += "[normalNodesSegmentDist:"
|
2023-05-04 12:22:40 +08:00
|
|
|
for normalNodeID, normalNodeCollectionSegments := range nodeSegments {
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += fmt.Sprintf("[nodeID:%d, ", normalNodeID)
|
|
|
|
distInfo += "loaded-segments:["
|
|
|
|
nodeRowSum := int64(0)
|
2024-03-08 16:29:01 +08:00
|
|
|
normalNodeSegments := segmentDistMgr.GetByFilter(meta.WithNodeID(normalNodeID))
|
2023-05-04 12:22:40 +08:00
|
|
|
for _, normalNodeSegment := range normalNodeSegments {
|
|
|
|
nodeRowSum += normalNodeSegment.GetNumOfRows()
|
|
|
|
}
|
|
|
|
nodeCollectionRowSum := int64(0)
|
|
|
|
for _, normalCollectionSegment := range normalNodeCollectionSegments {
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += fmt.Sprintf("[segmentID: %d, rowCount: %d] ",
|
2023-05-04 12:22:40 +08:00
|
|
|
normalCollectionSegment.GetID(), normalCollectionSegment.GetNumOfRows())
|
|
|
|
nodeCollectionRowSum += normalCollectionSegment.GetNumOfRows()
|
2023-04-03 14:16:25 +08:00
|
|
|
}
|
2023-05-04 12:22:40 +08:00
|
|
|
distInfo += fmt.Sprintf("] nodeRowSum:%d nodeCollectionRowSum:%d]", nodeRowSum, nodeCollectionRowSum)
|
2023-04-03 14:16:25 +08:00
|
|
|
}
|
2023-05-04 12:22:40 +08:00
|
|
|
distInfo += "]"
|
2023-04-03 14:16:25 +08:00
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// 3. print stopping nodes channel distribution
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += "[stoppingNodesChannelDist:"
|
|
|
|
for stoppingNodeID := range stoppingNodesSegments {
|
2024-04-26 11:49:25 +08:00
|
|
|
stoppingNodeChannels := channelManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(stoppingNodeID))
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += fmt.Sprintf("[nodeID:%d, count:%d,", stoppingNodeID, len(stoppingNodeChannels))
|
|
|
|
distInfo += "channels:["
|
|
|
|
for _, stoppingChan := range stoppingNodeChannels {
|
|
|
|
distInfo += fmt.Sprintf("%s,", stoppingChan.GetChannelName())
|
|
|
|
}
|
|
|
|
distInfo += "]]"
|
|
|
|
}
|
2023-05-04 12:22:40 +08:00
|
|
|
distInfo += "]"
|
2023-04-03 14:16:25 +08:00
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// 4. print normal nodes channel distribution
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += "[normalNodesChannelDist:"
|
|
|
|
for normalNodeID := range nodeSegments {
|
2024-04-26 11:49:25 +08:00
|
|
|
normalNodeChannels := channelManager.GetByCollectionAndFilter(replica.GetCollectionID(), meta.WithNodeID2Channel(normalNodeID))
|
2023-04-03 14:16:25 +08:00
|
|
|
distInfo += fmt.Sprintf("[nodeID:%d, count:%d,", normalNodeID, len(normalNodeChannels))
|
|
|
|
distInfo += "channels:["
|
|
|
|
for _, normalNodeChan := range normalNodeChannels {
|
|
|
|
distInfo += fmt.Sprintf("%s,", normalNodeChan.GetChannelName())
|
|
|
|
}
|
|
|
|
distInfo += "]]"
|
|
|
|
}
|
2023-05-04 12:22:40 +08:00
|
|
|
distInfo += "]"
|
2023-04-03 14:16:25 +08:00
|
|
|
|
|
|
|
log.Info(distInfo)
|
|
|
|
}
|