2022-10-11 11:39:22 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
package task
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"sync"
|
2022-10-19 13:19:27 +08:00
|
|
|
"time"
|
2022-09-15 18:48:32 +08:00
|
|
|
|
2024-03-01 16:07:37 +08:00
|
|
|
"github.com/blang/semver/v4"
|
2023-07-25 18:47:01 +08:00
|
|
|
"github.com/cockroachdb/errors"
|
2023-12-21 18:07:24 +08:00
|
|
|
"github.com/samber/lo"
|
2023-04-06 19:14:32 +08:00
|
|
|
"go.uber.org/atomic"
|
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2024-02-21 11:08:51 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
2023-12-21 18:07:24 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
2022-09-21 14:42:51 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
2022-09-15 18:48:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/meta"
|
2022-11-30 13:57:15 +08:00
|
|
|
. "github.com/milvus-io/milvus/internal/querycoordv2/params"
|
2022-09-15 18:48:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/session"
|
|
|
|
"github.com/milvus-io/milvus/internal/querycoordv2/utils"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2024-02-21 11:08:51 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
|
2023-12-21 18:07:24 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/indexparams"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2023-09-21 09:45:27 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/tsoutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2024-03-01 16:07:37 +08:00
|
|
|
// segmentsVersion is used for the flushed segments should not be included in the watch dm channel request
|
|
|
|
var segmentsVersion = semver.Version{
|
|
|
|
Major: 2,
|
|
|
|
Minor: 3,
|
|
|
|
Patch: 4,
|
|
|
|
}
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
type Executor struct {
|
2022-09-21 14:42:51 +08:00
|
|
|
doneCh chan struct{}
|
|
|
|
wg sync.WaitGroup
|
2022-09-15 18:48:32 +08:00
|
|
|
meta *meta.Meta
|
|
|
|
dist *meta.DistributionManager
|
|
|
|
broker meta.Broker
|
2024-08-02 17:20:15 +08:00
|
|
|
targetMgr meta.TargetManagerInterface
|
2022-09-15 18:48:32 +08:00
|
|
|
cluster session.Cluster
|
|
|
|
nodeMgr *session.NodeManager
|
|
|
|
|
2023-07-26 08:51:02 +08:00
|
|
|
executingTasks *typeutil.ConcurrentSet[string] // task index
|
2022-11-30 13:57:15 +08:00
|
|
|
executingTaskNum atomic.Int32
|
2024-06-07 08:25:53 +08:00
|
|
|
executedFlag chan struct{}
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewExecutor(meta *meta.Meta,
|
|
|
|
dist *meta.DistributionManager,
|
|
|
|
broker meta.Broker,
|
2024-08-02 17:20:15 +08:00
|
|
|
targetMgr meta.TargetManagerInterface,
|
2022-09-15 18:48:32 +08:00
|
|
|
cluster session.Cluster,
|
2023-09-21 09:45:27 +08:00
|
|
|
nodeMgr *session.NodeManager,
|
|
|
|
) *Executor {
|
2022-09-15 18:48:32 +08:00
|
|
|
return &Executor{
|
2022-09-21 14:42:51 +08:00
|
|
|
doneCh: make(chan struct{}),
|
2022-09-15 18:48:32 +08:00
|
|
|
meta: meta,
|
|
|
|
dist: dist,
|
|
|
|
broker: broker,
|
|
|
|
targetMgr: targetMgr,
|
|
|
|
cluster: cluster,
|
|
|
|
nodeMgr: nodeMgr,
|
|
|
|
|
2023-07-26 08:51:02 +08:00
|
|
|
executingTasks: typeutil.NewConcurrentSet[string](),
|
2024-06-07 08:25:53 +08:00
|
|
|
executedFlag: make(chan struct{}, 1),
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-21 14:42:51 +08:00
|
|
|
func (ex *Executor) Start(ctx context.Context) {
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) Stop() {
|
|
|
|
ex.wg.Wait()
|
|
|
|
}
|
|
|
|
|
2022-09-15 18:48:32 +08:00
|
|
|
// Execute executes the given action,
|
|
|
|
// does nothing and returns false if the action is already committed,
|
|
|
|
// returns true otherwise.
|
2022-09-21 14:42:51 +08:00
|
|
|
func (ex *Executor) Execute(task Task, step int) bool {
|
2023-07-26 08:51:02 +08:00
|
|
|
exist := !ex.executingTasks.Insert(task.Index())
|
2022-09-15 18:48:32 +08:00
|
|
|
if exist {
|
|
|
|
return false
|
|
|
|
}
|
2022-12-20 20:33:27 +08:00
|
|
|
if ex.executingTaskNum.Inc() > Params.QueryCoordCfg.TaskExecutionCap.GetAsInt32() {
|
2023-07-26 08:51:02 +08:00
|
|
|
ex.executingTasks.Remove(task.Index())
|
2022-12-20 20:33:27 +08:00
|
|
|
ex.executingTaskNum.Dec()
|
|
|
|
return false
|
|
|
|
}
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
log := log.With(
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("taskID", task.ID()),
|
2022-12-20 20:33:27 +08:00
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
2022-09-15 18:48:32 +08:00
|
|
|
zap.Int("step", step),
|
2023-10-27 01:08:12 +08:00
|
|
|
zap.String("source", task.Source().String()),
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
log.Info("execute the action of task")
|
2022-09-21 14:42:51 +08:00
|
|
|
switch task.Actions()[step].(type) {
|
2022-09-15 18:48:32 +08:00
|
|
|
case *SegmentAction:
|
2022-09-21 14:42:51 +08:00
|
|
|
ex.executeSegmentAction(task.(*SegmentTask), step)
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
case *ChannelAction:
|
2022-09-21 14:42:51 +08:00
|
|
|
ex.executeDmChannelAction(task.(*ChannelTask), step)
|
2024-02-21 11:08:51 +08:00
|
|
|
|
|
|
|
case *LeaderAction:
|
|
|
|
ex.executeLeaderAction(task.(*LeaderTask), step)
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2024-06-07 08:25:53 +08:00
|
|
|
func (ex *Executor) GetExecutedFlag() <-chan struct{} {
|
|
|
|
return ex.executedFlag
|
|
|
|
}
|
|
|
|
|
2022-11-30 13:57:15 +08:00
|
|
|
func (ex *Executor) removeTask(task Task, step int) {
|
2022-09-26 10:54:52 +08:00
|
|
|
if task.Err() != nil {
|
2023-05-04 12:22:40 +08:00
|
|
|
log.Info("execute action done, remove it",
|
2022-09-26 10:54:52 +08:00
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int("step", step),
|
|
|
|
zap.Error(task.Err()))
|
2024-06-07 08:25:53 +08:00
|
|
|
} else {
|
|
|
|
select {
|
|
|
|
case ex.executedFlag <- struct{}{}:
|
|
|
|
default:
|
|
|
|
}
|
2022-09-26 10:54:52 +08:00
|
|
|
}
|
|
|
|
|
2023-07-26 08:51:02 +08:00
|
|
|
ex.executingTasks.Remove(task.Index())
|
2022-11-30 13:57:15 +08:00
|
|
|
ex.executingTaskNum.Dec()
|
2022-09-21 14:42:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) executeSegmentAction(task *SegmentTask, step int) {
|
|
|
|
switch task.Actions()[step].Type() {
|
2023-07-19 21:22:58 +08:00
|
|
|
case ActionTypeGrow, ActionTypeUpdate:
|
2022-09-21 14:42:51 +08:00
|
|
|
ex.loadSegment(task, step)
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
case ActionTypeReduce:
|
2022-09-21 14:42:51 +08:00
|
|
|
ex.releaseSegment(task, step)
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-21 14:42:51 +08:00
|
|
|
// loadSegment commits the request to merger,
|
|
|
|
// not really executes the request
|
2022-09-30 19:48:55 +08:00
|
|
|
func (ex *Executor) loadSegment(task *SegmentTask, step int) error {
|
2022-09-21 14:42:51 +08:00
|
|
|
action := task.Actions()[step].(*SegmentAction)
|
2023-07-19 21:22:58 +08:00
|
|
|
defer action.rpcReturned.Store(true)
|
2023-09-14 09:59:19 +08:00
|
|
|
ctx := task.Context()
|
|
|
|
log := log.Ctx(ctx).With(
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
2022-12-20 20:33:27 +08:00
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("segmentID", task.segmentID),
|
2022-09-15 18:48:32 +08:00
|
|
|
zap.Int64("node", action.Node()),
|
2023-10-27 01:08:12 +08:00
|
|
|
zap.String("source", task.Source().String()),
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2022-09-30 19:48:55 +08:00
|
|
|
var err error
|
2022-09-26 10:54:52 +08:00
|
|
|
defer func() {
|
2022-09-30 19:48:55 +08:00
|
|
|
if err != nil {
|
2023-08-01 10:11:05 +08:00
|
|
|
task.Fail(err)
|
2022-09-26 10:54:52 +08:00
|
|
|
}
|
2023-12-12 10:48:37 +08:00
|
|
|
ex.removeTask(task, step)
|
2022-09-26 10:54:52 +08:00
|
|
|
}()
|
|
|
|
|
2024-02-21 11:08:51 +08:00
|
|
|
collectionInfo, loadMeta, channel, err := ex.getMetaInfo(ctx, task)
|
2022-09-15 18:48:32 +08:00
|
|
|
if err != nil {
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2023-06-06 21:40:35 +08:00
|
|
|
|
2024-02-21 11:08:51 +08:00
|
|
|
loadInfo, indexInfos, err := ex.getLoadInfo(ctx, task.CollectionID(), action.SegmentID(), channel)
|
2023-07-17 19:23:19 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-11-02 23:52:16 +08:00
|
|
|
req := packLoadSegmentRequest(
|
|
|
|
task,
|
|
|
|
action,
|
|
|
|
collectionInfo.GetSchema(),
|
|
|
|
collectionInfo.GetProperties(),
|
|
|
|
loadMeta,
|
|
|
|
loadInfo,
|
2023-12-21 18:07:24 +08:00
|
|
|
indexInfos,
|
2023-11-02 23:52:16 +08:00
|
|
|
)
|
2023-12-12 10:48:37 +08:00
|
|
|
|
2024-04-10 15:13:36 +08:00
|
|
|
// get segment's replica first, then get shard leader by replica
|
2024-03-27 12:53:10 +08:00
|
|
|
replica := ex.meta.ReplicaManager.GetByCollectionAndNode(task.CollectionID(), action.Node())
|
2024-04-03 10:03:22 +08:00
|
|
|
if replica == nil {
|
|
|
|
msg := "node doesn't belong to any replica"
|
|
|
|
err := merr.WrapErrNodeNotAvailable(action.Node())
|
|
|
|
log.Warn(msg, zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
2024-04-10 15:13:36 +08:00
|
|
|
view := ex.dist.LeaderViewManager.GetLatestShardLeaderByFilter(meta.WithReplica2LeaderView(replica), meta.WithChannelName2LeaderView(action.Shard()))
|
2024-03-27 12:53:10 +08:00
|
|
|
if view == nil {
|
2023-12-12 10:48:37 +08:00
|
|
|
msg := "no shard leader for the segment to execute loading"
|
2024-02-21 11:08:51 +08:00
|
|
|
err = merr.WrapErrChannelNotFound(task.Shard(), "shard delegator not found")
|
2023-12-12 10:48:37 +08:00
|
|
|
log.Warn(msg, zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
2024-03-27 12:53:10 +08:00
|
|
|
log = log.With(zap.Int64("shardLeader", view.ID))
|
2023-12-12 10:48:37 +08:00
|
|
|
|
|
|
|
startTs := time.Now()
|
|
|
|
log.Info("load segments...")
|
2024-03-27 12:53:10 +08:00
|
|
|
status, err := ex.cluster.LoadSegments(task.Context(), view.ID, req)
|
2023-12-12 10:48:37 +08:00
|
|
|
err = merr.CheckRPCCall(status, err)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to load segment", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
elapsed := time.Since(startTs)
|
|
|
|
log.Info("load segments done", zap.Duration("elapsed", elapsed))
|
|
|
|
|
2022-09-30 19:48:55 +08:00
|
|
|
return nil
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
2022-09-21 14:42:51 +08:00
|
|
|
func (ex *Executor) releaseSegment(task *SegmentTask, step int) {
|
2022-11-30 13:57:15 +08:00
|
|
|
defer ex.removeTask(task, step)
|
2022-11-01 14:27:34 +08:00
|
|
|
startTs := time.Now()
|
2022-09-21 14:42:51 +08:00
|
|
|
action := task.Actions()[step].(*SegmentAction)
|
2023-07-19 21:22:58 +08:00
|
|
|
defer action.rpcReturned.Store(true)
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
log := log.With(
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
2022-12-20 20:33:27 +08:00
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("segmentID", task.segmentID),
|
2022-09-15 18:48:32 +08:00
|
|
|
zap.Int64("node", action.Node()),
|
2023-10-27 01:08:12 +08:00
|
|
|
zap.String("source", task.Source().String()),
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2022-09-30 19:48:55 +08:00
|
|
|
ctx := task.Context()
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
dstNode := action.Node()
|
2024-04-12 10:39:19 +08:00
|
|
|
|
2022-09-26 14:08:52 +08:00
|
|
|
req := packReleaseSegmentRequest(task, action)
|
2024-04-12 10:39:19 +08:00
|
|
|
channel := ex.targetMgr.GetDmChannel(task.CollectionID(), task.Shard(), meta.CurrentTarget)
|
|
|
|
if channel != nil {
|
|
|
|
// if channel exists in current target, set cp to ReleaseSegmentRequest, need to use it as growing segment's exclude ts
|
|
|
|
req.Checkpoint = channel.GetSeekPosition()
|
|
|
|
}
|
|
|
|
|
2022-09-28 12:10:54 +08:00
|
|
|
if action.Scope() == querypb.DataScope_Streaming {
|
|
|
|
// Any modification to the segment distribution have to set NeedTransfer true,
|
|
|
|
// to protect the version, which serves search/query
|
|
|
|
req.NeedTransfer = true
|
|
|
|
} else {
|
2024-01-20 18:58:58 +08:00
|
|
|
req.Shard = task.shard
|
2022-09-26 14:08:52 +08:00
|
|
|
|
|
|
|
if ex.meta.CollectionManager.Exist(task.CollectionID()) {
|
2024-04-10 15:13:36 +08:00
|
|
|
// get segment's replica first, then get shard leader by replica
|
2024-03-27 12:53:10 +08:00
|
|
|
replica := ex.meta.ReplicaManager.GetByCollectionAndNode(task.CollectionID(), action.Node())
|
2024-04-03 10:03:22 +08:00
|
|
|
if replica == nil {
|
|
|
|
msg := "node doesn't belong to any replica"
|
|
|
|
err := merr.WrapErrNodeNotAvailable(action.Node())
|
|
|
|
log.Warn(msg, zap.Error(err))
|
|
|
|
return
|
|
|
|
}
|
2024-04-10 15:13:36 +08:00
|
|
|
view := ex.dist.LeaderViewManager.GetLatestShardLeaderByFilter(meta.WithReplica2LeaderView(replica), meta.WithChannelName2LeaderView(action.Shard()))
|
2024-03-27 12:53:10 +08:00
|
|
|
if view == nil {
|
|
|
|
msg := "no shard leader for the segment to execute releasing"
|
|
|
|
err := merr.WrapErrChannelNotFound(task.Shard(), "shard delegator not found")
|
|
|
|
log.Warn(msg, zap.Error(err))
|
2022-09-26 14:08:52 +08:00
|
|
|
return
|
|
|
|
}
|
2024-04-12 10:39:19 +08:00
|
|
|
|
2024-03-27 12:53:10 +08:00
|
|
|
dstNode = view.ID
|
|
|
|
log = log.With(zap.Int64("shardLeader", view.ID))
|
2022-09-26 14:08:52 +08:00
|
|
|
req.NeedTransfer = true
|
|
|
|
}
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2022-09-26 14:08:52 +08:00
|
|
|
|
2022-09-26 10:54:52 +08:00
|
|
|
log.Info("release segment...")
|
2022-09-15 18:48:32 +08:00
|
|
|
status, err := ex.cluster.ReleaseSegments(ctx, dstNode, req)
|
2023-12-12 10:48:37 +08:00
|
|
|
err = merr.CheckRPCCall(status, err)
|
2022-09-15 18:48:32 +08:00
|
|
|
if err != nil {
|
2023-12-12 10:48:37 +08:00
|
|
|
log.Warn("failed to release segment", zap.Error(err))
|
2022-09-15 18:48:32 +08:00
|
|
|
return
|
|
|
|
}
|
2022-11-01 14:27:34 +08:00
|
|
|
elapsed := time.Since(startTs)
|
|
|
|
log.Info("release segment done", zap.Int64("taskID", task.ID()), zap.Duration("time taken", elapsed))
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
2022-09-21 14:42:51 +08:00
|
|
|
func (ex *Executor) executeDmChannelAction(task *ChannelTask, step int) {
|
|
|
|
switch task.Actions()[step].Type() {
|
2022-09-15 18:48:32 +08:00
|
|
|
case ActionTypeGrow:
|
2023-11-07 01:44:18 +08:00
|
|
|
ex.subscribeChannel(task, step)
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
case ActionTypeReduce:
|
2023-11-07 01:44:18 +08:00
|
|
|
ex.unsubscribeChannel(task, step)
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-07 01:44:18 +08:00
|
|
|
func (ex *Executor) subscribeChannel(task *ChannelTask, step int) error {
|
2022-11-30 13:57:15 +08:00
|
|
|
defer ex.removeTask(task, step)
|
2022-11-01 14:27:34 +08:00
|
|
|
startTs := time.Now()
|
2022-09-21 14:42:51 +08:00
|
|
|
action := task.Actions()[step].(*ChannelAction)
|
2022-09-15 18:48:32 +08:00
|
|
|
log := log.With(
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
2022-12-20 20:33:27 +08:00
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
2022-09-15 18:48:32 +08:00
|
|
|
zap.String("channel", task.Channel()),
|
|
|
|
zap.Int64("node", action.Node()),
|
2023-10-27 01:08:12 +08:00
|
|
|
zap.String("source", task.Source().String()),
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2022-09-30 19:48:55 +08:00
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2023-08-01 10:11:05 +08:00
|
|
|
task.Fail(err)
|
2022-09-30 19:48:55 +08:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
ctx := task.Context()
|
2022-09-15 18:48:32 +08:00
|
|
|
|
2023-11-02 23:52:16 +08:00
|
|
|
collectionInfo, err := ex.broker.DescribeCollection(ctx, task.CollectionID())
|
2022-09-15 18:48:32 +08:00
|
|
|
if err != nil {
|
2023-11-02 23:52:16 +08:00
|
|
|
log.Warn("failed to get collection info")
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2023-03-20 14:55:57 +08:00
|
|
|
partitions, err := utils.GetPartitions(ex.meta.CollectionManager, task.CollectionID())
|
2022-09-15 18:48:32 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to get partitions of collection")
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2024-03-07 21:43:03 +08:00
|
|
|
indexInfo, err := ex.broker.ListIndexes(ctx, task.CollectionID())
|
2023-06-14 14:40:39 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("fail to get index meta of collection")
|
|
|
|
return err
|
|
|
|
}
|
2022-09-15 18:48:32 +08:00
|
|
|
loadMeta := packLoadMeta(
|
|
|
|
ex.meta.GetLoadType(task.CollectionID()),
|
|
|
|
task.CollectionID(),
|
2024-03-21 11:59:12 +08:00
|
|
|
collectionInfo.GetDbName(),
|
|
|
|
task.ResourceGroup(),
|
2022-09-15 18:48:32 +08:00
|
|
|
partitions...,
|
|
|
|
)
|
2022-09-26 10:54:52 +08:00
|
|
|
|
2022-11-07 19:37:04 +08:00
|
|
|
dmChannel := ex.targetMgr.GetDmChannel(task.CollectionID(), action.ChannelName(), meta.NextTarget)
|
|
|
|
if dmChannel == nil {
|
|
|
|
msg := "channel does not exist in next target, skip it"
|
|
|
|
log.Warn(msg, zap.String("channelName", action.ChannelName()))
|
2023-03-16 17:43:55 +08:00
|
|
|
return merr.WrapErrChannelReduplicate(action.ChannelName())
|
2022-11-07 19:37:04 +08:00
|
|
|
}
|
2023-11-02 23:52:16 +08:00
|
|
|
req := packSubChannelRequest(
|
|
|
|
task,
|
|
|
|
action,
|
|
|
|
collectionInfo.GetSchema(),
|
|
|
|
loadMeta,
|
|
|
|
dmChannel,
|
|
|
|
indexInfo,
|
|
|
|
)
|
2024-03-01 16:07:37 +08:00
|
|
|
err = fillSubChannelRequest(ctx, req, ex.broker, ex.shouldIncludeFlushedSegmentInfo(action.Node()))
|
2022-09-15 18:48:32 +08:00
|
|
|
if err != nil {
|
2023-03-16 17:43:55 +08:00
|
|
|
log.Warn("failed to subscribe channel, failed to fill the request with segments",
|
2022-09-15 18:48:32 +08:00
|
|
|
zap.Error(err))
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2022-10-19 13:19:27 +08:00
|
|
|
|
|
|
|
ts := dmChannel.GetSeekPosition().GetTimestamp()
|
|
|
|
log.Info("subscribe channel...",
|
|
|
|
zap.Uint64("checkpoint", ts),
|
|
|
|
zap.Duration("sinceCheckpoint", time.Since(tsoutil.PhysicalTime(ts))),
|
|
|
|
)
|
2022-09-15 18:48:32 +08:00
|
|
|
status, err := ex.cluster.WatchDmChannels(ctx, action.Node(), req)
|
|
|
|
if err != nil {
|
2023-03-16 17:43:55 +08:00
|
|
|
log.Warn("failed to subscribe channel, it may be a false failure", zap.Error(err))
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2023-03-16 17:43:55 +08:00
|
|
|
if !merr.Ok(status) {
|
|
|
|
err = merr.Error(status)
|
|
|
|
log.Warn("failed to subscribe channel", zap.Error(err))
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2022-11-01 14:27:34 +08:00
|
|
|
elapsed := time.Since(startTs)
|
2023-03-16 17:43:55 +08:00
|
|
|
log.Info("subscribe channel done", zap.Int64("taskID", task.ID()), zap.Duration("time taken", elapsed))
|
2022-09-30 19:48:55 +08:00
|
|
|
return nil
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
|
|
|
|
2024-03-01 16:07:37 +08:00
|
|
|
func (ex *Executor) shouldIncludeFlushedSegmentInfo(nodeID int64) bool {
|
|
|
|
node := ex.nodeMgr.Get(nodeID)
|
|
|
|
if node == nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return node.Version().LT(segmentsVersion)
|
|
|
|
}
|
|
|
|
|
2023-11-07 01:44:18 +08:00
|
|
|
func (ex *Executor) unsubscribeChannel(task *ChannelTask, step int) error {
|
2022-11-30 13:57:15 +08:00
|
|
|
defer ex.removeTask(task, step)
|
2022-11-01 14:27:34 +08:00
|
|
|
startTs := time.Now()
|
2022-09-21 14:42:51 +08:00
|
|
|
action := task.Actions()[step].(*ChannelAction)
|
2022-09-15 18:48:32 +08:00
|
|
|
log := log.With(
|
2022-09-21 14:42:51 +08:00
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
2022-12-20 20:33:27 +08:00
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
2022-09-15 18:48:32 +08:00
|
|
|
zap.String("channel", task.Channel()),
|
|
|
|
zap.Int64("node", action.Node()),
|
2023-10-27 01:08:12 +08:00
|
|
|
zap.String("source", task.Source().String()),
|
2022-09-15 18:48:32 +08:00
|
|
|
)
|
|
|
|
|
2022-09-30 19:48:55 +08:00
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2023-08-01 10:11:05 +08:00
|
|
|
task.Fail(err)
|
2022-09-30 19:48:55 +08:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
ctx := task.Context()
|
2022-09-15 18:48:32 +08:00
|
|
|
|
|
|
|
req := packUnsubDmChannelRequest(task, action)
|
2022-12-27 15:57:30 +08:00
|
|
|
log.Info("unsubscribe channel...")
|
2022-09-15 18:48:32 +08:00
|
|
|
status, err := ex.cluster.UnsubDmChannel(ctx, action.Node(), req)
|
|
|
|
if err != nil {
|
2023-03-16 17:43:55 +08:00
|
|
|
log.Warn("failed to unsubscribe channel, it may be a false failure", zap.Error(err))
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2023-03-16 17:43:55 +08:00
|
|
|
if !merr.Ok(status) {
|
|
|
|
err = merr.Error(status)
|
|
|
|
log.Warn("failed to unsubscribe channel", zap.Error(err))
|
2022-09-30 19:48:55 +08:00
|
|
|
return err
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2022-11-01 14:27:34 +08:00
|
|
|
|
|
|
|
elapsed := time.Since(startTs)
|
2023-03-16 17:43:55 +08:00
|
|
|
log.Info("unsubscribe channel done", zap.Int64("taskID", task.ID()), zap.Duration("time taken", elapsed))
|
2022-09-30 19:48:55 +08:00
|
|
|
return nil
|
2022-09-15 18:48:32 +08:00
|
|
|
}
|
2024-02-21 11:08:51 +08:00
|
|
|
|
|
|
|
func (ex *Executor) executeLeaderAction(task *LeaderTask, step int) {
|
|
|
|
switch task.Actions()[step].Type() {
|
2024-06-10 21:34:08 +08:00
|
|
|
case ActionTypeGrow:
|
2024-02-21 11:08:51 +08:00
|
|
|
ex.setDistribution(task, step)
|
|
|
|
|
|
|
|
case ActionTypeReduce:
|
|
|
|
ex.removeDistribution(task, step)
|
2024-06-10 21:34:08 +08:00
|
|
|
|
|
|
|
case ActionTypeUpdate:
|
|
|
|
ex.updatePartStatsVersions(task, step)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) updatePartStatsVersions(task *LeaderTask, step int) error {
|
|
|
|
action := task.Actions()[step].(*LeaderAction)
|
|
|
|
defer action.rpcReturned.Store(true)
|
|
|
|
ctx := task.Context()
|
|
|
|
log := log.Ctx(ctx).With(
|
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
|
|
|
zap.Int64("leader", action.leaderID),
|
|
|
|
zap.Int64("node", action.Node()),
|
|
|
|
zap.String("source", task.Source().String()),
|
|
|
|
)
|
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
task.Fail(err)
|
|
|
|
}
|
|
|
|
ex.removeTask(task, step)
|
|
|
|
}()
|
|
|
|
|
|
|
|
req := &querypb.SyncDistributionRequest{
|
|
|
|
Base: commonpbutil.NewMsgBase(
|
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_SyncDistribution),
|
|
|
|
commonpbutil.WithMsgID(task.ID()),
|
|
|
|
),
|
|
|
|
CollectionID: task.collectionID,
|
|
|
|
Channel: task.Shard(),
|
|
|
|
ReplicaID: task.ReplicaID(),
|
|
|
|
Actions: []*querypb.SyncAction{
|
|
|
|
{
|
|
|
|
Type: querypb.SyncType_UpdatePartitionStats,
|
|
|
|
SegmentID: action.SegmentID(),
|
|
|
|
NodeID: action.Node(),
|
|
|
|
Version: action.Version(),
|
|
|
|
PartitionStatsVersions: action.partStatsVersions,
|
|
|
|
},
|
|
|
|
},
|
2024-02-21 11:08:51 +08:00
|
|
|
}
|
2024-06-10 21:34:08 +08:00
|
|
|
startTs := time.Now()
|
|
|
|
log.Debug("Update partition stats versions...")
|
|
|
|
status, err := ex.cluster.SyncDistribution(task.Context(), task.leaderID, req)
|
|
|
|
err = merr.CheckRPCCall(status, err)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to update partition stats versions", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
elapsed := time.Since(startTs)
|
|
|
|
log.Debug("update partition stats done", zap.Duration("elapsed", elapsed))
|
|
|
|
|
|
|
|
return nil
|
2024-02-21 11:08:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) setDistribution(task *LeaderTask, step int) error {
|
|
|
|
action := task.Actions()[step].(*LeaderAction)
|
|
|
|
defer action.rpcReturned.Store(true)
|
|
|
|
ctx := task.Context()
|
|
|
|
log := log.Ctx(ctx).With(
|
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
|
|
|
zap.Int64("segmentID", task.segmentID),
|
|
|
|
zap.Int64("leader", action.leaderID),
|
|
|
|
zap.Int64("node", action.Node()),
|
|
|
|
zap.String("source", task.Source().String()),
|
|
|
|
)
|
|
|
|
|
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
task.Fail(err)
|
|
|
|
}
|
|
|
|
ex.removeTask(task, step)
|
|
|
|
}()
|
|
|
|
|
|
|
|
collectionInfo, loadMeta, channel, err := ex.getMetaInfo(ctx, task)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2024-02-26 11:02:55 +08:00
|
|
|
loadInfo, indexInfo, err := ex.getLoadInfo(ctx, task.CollectionID(), action.SegmentID(), channel)
|
2024-02-21 11:08:51 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
req := &querypb.SyncDistributionRequest{
|
|
|
|
Base: commonpbutil.NewMsgBase(
|
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_LoadSegments),
|
|
|
|
commonpbutil.WithMsgID(task.ID()),
|
|
|
|
),
|
|
|
|
CollectionID: task.collectionID,
|
|
|
|
Channel: task.Shard(),
|
|
|
|
Schema: collectionInfo.GetSchema(),
|
|
|
|
LoadMeta: loadMeta,
|
|
|
|
ReplicaID: task.ReplicaID(),
|
|
|
|
Actions: []*querypb.SyncAction{
|
|
|
|
{
|
|
|
|
Type: querypb.SyncType_Set,
|
|
|
|
PartitionID: loadInfo.GetPartitionID(),
|
|
|
|
SegmentID: action.SegmentID(),
|
|
|
|
NodeID: action.Node(),
|
|
|
|
Info: loadInfo,
|
2024-03-08 11:57:01 +08:00
|
|
|
Version: action.Version(),
|
2024-02-21 11:08:51 +08:00
|
|
|
},
|
|
|
|
},
|
2024-02-26 11:02:55 +08:00
|
|
|
IndexInfoList: indexInfo,
|
2024-02-21 11:08:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
startTs := time.Now()
|
|
|
|
log.Info("Sync Distribution...")
|
|
|
|
status, err := ex.cluster.SyncDistribution(task.Context(), task.leaderID, req)
|
|
|
|
err = merr.CheckRPCCall(status, err)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to sync distribution", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
elapsed := time.Since(startTs)
|
|
|
|
log.Info("sync distribution done", zap.Duration("elapsed", elapsed))
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) removeDistribution(task *LeaderTask, step int) error {
|
|
|
|
action := task.Actions()[step].(*LeaderAction)
|
|
|
|
defer action.rpcReturned.Store(true)
|
|
|
|
ctx := task.Context()
|
|
|
|
log := log.Ctx(ctx).With(
|
|
|
|
zap.Int64("taskID", task.ID()),
|
|
|
|
zap.Int64("collectionID", task.CollectionID()),
|
|
|
|
zap.Int64("replicaID", task.ReplicaID()),
|
|
|
|
zap.Int64("segmentID", task.segmentID),
|
|
|
|
zap.Int64("leader", action.leaderID),
|
|
|
|
zap.Int64("node", action.Node()),
|
|
|
|
zap.String("source", task.Source().String()),
|
|
|
|
)
|
|
|
|
|
|
|
|
var err error
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
task.Fail(err)
|
|
|
|
}
|
|
|
|
ex.removeTask(task, step)
|
|
|
|
}()
|
|
|
|
|
|
|
|
req := &querypb.SyncDistributionRequest{
|
|
|
|
Base: commonpbutil.NewMsgBase(
|
2024-03-13 06:11:04 +08:00
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_SyncDistribution),
|
2024-02-21 11:08:51 +08:00
|
|
|
commonpbutil.WithMsgID(task.ID()),
|
|
|
|
),
|
|
|
|
CollectionID: task.collectionID,
|
|
|
|
Channel: task.Shard(),
|
|
|
|
ReplicaID: task.ReplicaID(),
|
|
|
|
Actions: []*querypb.SyncAction{
|
|
|
|
{
|
2024-03-13 06:11:04 +08:00
|
|
|
Type: querypb.SyncType_Remove,
|
2024-02-21 11:08:51 +08:00
|
|
|
SegmentID: action.SegmentID(),
|
2024-03-14 15:09:03 +08:00
|
|
|
NodeID: action.Node(),
|
2024-02-21 11:08:51 +08:00
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
startTs := time.Now()
|
2024-03-13 06:11:04 +08:00
|
|
|
log.Info("Remove Distribution...")
|
2024-02-21 11:08:51 +08:00
|
|
|
status, err := ex.cluster.SyncDistribution(task.Context(), task.leaderID, req)
|
|
|
|
err = merr.CheckRPCCall(status, err)
|
|
|
|
if err != nil {
|
2024-03-13 06:11:04 +08:00
|
|
|
log.Warn("failed to remove distribution", zap.Error(err))
|
2024-02-21 11:08:51 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
elapsed := time.Since(startTs)
|
2024-03-13 06:11:04 +08:00
|
|
|
log.Info("remove distribution done", zap.Duration("elapsed", elapsed))
|
2024-02-21 11:08:51 +08:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) getMetaInfo(ctx context.Context, task Task) (*milvuspb.DescribeCollectionResponse, *querypb.LoadMetaInfo, *meta.DmChannel, error) {
|
|
|
|
collectionID := task.CollectionID()
|
|
|
|
shard := task.Shard()
|
|
|
|
log := log.Ctx(ctx)
|
|
|
|
collectionInfo, err := ex.broker.DescribeCollection(ctx, collectionID)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to get collection info", zap.Error(err))
|
|
|
|
return nil, nil, nil, err
|
|
|
|
}
|
|
|
|
partitions, err := utils.GetPartitions(ex.meta.CollectionManager, collectionID)
|
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to get partitions of collection", zap.Error(err))
|
|
|
|
return nil, nil, nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
loadMeta := packLoadMeta(
|
2024-03-21 11:59:12 +08:00
|
|
|
ex.meta.GetLoadType(task.CollectionID()),
|
|
|
|
task.CollectionID(),
|
|
|
|
collectionInfo.GetDbName(),
|
|
|
|
task.ResourceGroup(),
|
2024-02-21 11:08:51 +08:00
|
|
|
partitions...,
|
|
|
|
)
|
2024-03-21 11:59:12 +08:00
|
|
|
|
2024-02-21 11:08:51 +08:00
|
|
|
// get channel first, in case of target updated after segment info fetched
|
|
|
|
channel := ex.targetMgr.GetDmChannel(collectionID, shard, meta.NextTargetFirst)
|
|
|
|
if channel == nil {
|
|
|
|
return nil, nil, nil, merr.WrapErrChannelNotAvailable(shard)
|
|
|
|
}
|
|
|
|
|
|
|
|
return collectionInfo, loadMeta, channel, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (ex *Executor) getLoadInfo(ctx context.Context, collectionID, segmentID int64, channel *meta.DmChannel) (*querypb.SegmentLoadInfo, []*indexpb.IndexInfo, error) {
|
|
|
|
log := log.Ctx(ctx)
|
2024-08-15 19:17:00 +08:00
|
|
|
segmentInfos, err := ex.broker.GetSegmentInfo(ctx, segmentID)
|
|
|
|
if err != nil || len(segmentInfos) == 0 {
|
2024-02-21 11:08:51 +08:00
|
|
|
log.Warn("failed to get segment info from DataCoord", zap.Error(err))
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
2024-08-15 19:17:00 +08:00
|
|
|
segment := segmentInfos[0]
|
2024-02-21 11:08:51 +08:00
|
|
|
log = log.With(zap.String("level", segment.GetLevel().String()))
|
|
|
|
|
|
|
|
indexes, err := ex.broker.GetIndexInfo(ctx, collectionID, segment.GetID())
|
|
|
|
if err != nil {
|
|
|
|
if !errors.Is(err, merr.ErrIndexNotFound) {
|
|
|
|
log.Warn("failed to get index of segment", zap.Error(err))
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
indexes = nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get collection index info
|
2024-03-07 21:43:03 +08:00
|
|
|
indexInfos, err := ex.broker.ListIndexes(ctx, collectionID)
|
2024-02-21 11:08:51 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("fail to get index meta of collection", zap.Error(err))
|
|
|
|
return nil, nil, err
|
|
|
|
}
|
|
|
|
// update the field index params
|
|
|
|
for _, segmentIndex := range indexes {
|
|
|
|
index, found := lo.Find(indexInfos, func(indexInfo *indexpb.IndexInfo) bool {
|
|
|
|
return indexInfo.IndexID == segmentIndex.IndexID
|
|
|
|
})
|
|
|
|
if !found {
|
|
|
|
log.Warn("no collection index info for the given segment index", zap.String("indexName", segmentIndex.GetIndexName()))
|
|
|
|
}
|
|
|
|
|
|
|
|
params := funcutil.KeyValuePair2Map(segmentIndex.GetIndexParams())
|
|
|
|
for _, kv := range index.GetUserIndexParams() {
|
|
|
|
if indexparams.IsConfigableIndexParam(kv.GetKey()) {
|
|
|
|
params[kv.GetKey()] = kv.GetValue()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
segmentIndex.IndexParams = funcutil.Map2KeyValuePair(params)
|
|
|
|
}
|
|
|
|
|
|
|
|
loadInfo := utils.PackSegmentLoadInfo(segment, channel.GetSeekPosition(), indexes)
|
|
|
|
return loadInfo, indexInfos, nil
|
|
|
|
}
|