2021-04-19 13:47:10 +08:00
// Copyright (C) 2019-2020 Zilliz. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software distributed under the License
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// or implied. See the License for the specific language governing permissions and limitations under the License.
2021-06-22 16:44:09 +08:00
package querycoord
2021-04-15 15:15:46 +08:00
import (
"context"
2021-06-23 17:44:12 +08:00
"errors"
2021-04-15 15:15:46 +08:00
"fmt"
2021-10-11 09:54:37 +08:00
"sync"
2021-06-30 17:48:19 +08:00
"time"
2021-04-15 15:15:46 +08:00
2021-06-19 11:45:09 +08:00
"github.com/golang/protobuf/proto"
2021-04-15 15:15:46 +08:00
"go.uber.org/zap"
2021-04-22 14:45:57 +08:00
"github.com/milvus-io/milvus/internal/log"
"github.com/milvus-io/milvus/internal/proto/commonpb"
"github.com/milvus-io/milvus/internal/proto/datapb"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/milvuspb"
2021-07-02 10:40:13 +08:00
"github.com/milvus-io/milvus/internal/proto/proxypb"
2021-04-22 14:45:57 +08:00
"github.com/milvus-io/milvus/internal/proto/querypb"
"github.com/milvus-io/milvus/internal/types"
2021-06-30 16:18:13 +08:00
"github.com/milvus-io/milvus/internal/util/trace"
"github.com/opentracing/opentracing-go"
2021-04-15 15:15:46 +08:00
)
2021-06-19 11:45:09 +08:00
const (
2021-06-22 16:44:09 +08:00
triggerTaskPrefix = "queryCoord-triggerTask"
activeTaskPrefix = "queryCoord-activeTask"
taskInfoPrefix = "queryCoord-taskInfo"
loadBalanceInfoPrefix = "queryCoord-loadBalanceInfo"
2021-06-19 11:45:09 +08:00
)
2021-10-11 09:54:37 +08:00
const (
2021-10-15 20:25:08 +08:00
// MaxRetryNum is the maximum number of times that each task can be retried
2021-10-11 09:54:37 +08:00
MaxRetryNum = 5
2021-10-21 10:53:09 +08:00
// MaxSendSizeToEtcd is the default limit size of etcd messages that can be sent and received
2021-10-27 19:32:21 +08:00
// MaxSendSizeToEtcd = 2097152
// Limit size of every loadSegmentReq to 200k
MaxSendSizeToEtcd = 200000
2021-10-11 09:54:37 +08:00
)
2021-06-19 11:45:09 +08:00
type taskState int
const (
taskUndo taskState = 0
taskDoing taskState = 1
taskDone taskState = 3
taskExpired taskState = 4
2021-10-11 09:54:37 +08:00
taskFailed taskState = 5
2021-06-19 11:45:09 +08:00
)
2021-04-15 15:15:46 +08:00
type task interface {
2021-10-14 20:18:33 +08:00
traceCtx ( ) context . Context
getTaskID ( ) UniqueID // return ReqId
setTaskID ( id UniqueID )
msgBase ( ) * commonpb . MsgBase
msgType ( ) commonpb . MsgType
timestamp ( ) Timestamp
getTriggerCondition ( ) querypb . TriggerCondition
preExecute ( ctx context . Context ) error
execute ( ctx context . Context ) error
postExecute ( ctx context . Context ) error
reschedule ( ctx context . Context ) ( [ ] task , error )
rollBack ( ctx context . Context ) [ ] task
waitToFinish ( ) error
notify ( err error )
taskPriority ( ) querypb . TriggerCondition
setParentTask ( t task )
getParentTask ( ) task
getChildTask ( ) [ ] task
addChildTask ( t task )
removeChildTaskByID ( taskID UniqueID )
isValid ( ) bool
marshal ( ) ( [ ] byte , error )
getState ( ) taskState
setState ( state taskState )
isRetryable ( ) bool
setResultInfo ( err error )
getResultInfo ( ) * commonpb . Status
updateTaskProcess ( )
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
type baseTask struct {
2021-10-21 10:51:14 +08:00
condition
2021-10-11 09:54:37 +08:00
ctx context . Context
cancel context . CancelFunc
result * commonpb . Status
resultMu sync . RWMutex
state taskState
stateMu sync . RWMutex
retryCount int
//sync.RWMutex
2021-06-15 12:41:40 +08:00
taskID UniqueID
triggerCondition querypb . TriggerCondition
parentTask task
childTasks [ ] task
2021-10-11 09:54:37 +08:00
childTasksMu sync . RWMutex
}
2021-10-18 21:34:47 +08:00
func newBaseTask ( ctx context . Context , triggerType querypb . TriggerCondition ) * baseTask {
2021-10-11 09:54:37 +08:00
childCtx , cancel := context . WithCancel ( ctx )
2021-10-21 10:51:14 +08:00
condition := newTaskCondition ( childCtx )
2021-10-11 09:54:37 +08:00
2021-10-18 21:34:47 +08:00
baseTask := & baseTask {
2021-10-11 09:54:37 +08:00
ctx : childCtx ,
cancel : cancel ,
2021-10-21 10:51:14 +08:00
condition : condition ,
2021-10-11 09:54:37 +08:00
state : taskUndo ,
retryCount : MaxRetryNum ,
triggerCondition : triggerType ,
childTasks : [ ] task { } ,
}
return baseTask
2021-06-15 12:41:40 +08:00
}
2021-10-14 20:18:33 +08:00
// getTaskID function returns the unique taskID of the trigger task
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) getTaskID ( ) UniqueID {
2021-06-15 12:41:40 +08:00
return bt . taskID
}
2021-10-14 20:18:33 +08:00
// setTaskID function sets the trigger task with a unique id, which is allocated by tso
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) setTaskID ( id UniqueID ) {
2021-06-15 12:41:40 +08:00
bt . taskID = id
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) traceCtx ( ) context . Context {
2021-04-15 15:15:46 +08:00
return bt . ctx
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) getTriggerCondition ( ) querypb . TriggerCondition {
2021-10-11 09:54:37 +08:00
return bt . triggerCondition
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) taskPriority ( ) querypb . TriggerCondition {
2021-06-15 12:41:40 +08:00
return bt . triggerCondition
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) setParentTask ( t task ) {
2021-10-11 09:54:37 +08:00
bt . parentTask = t
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) getParentTask ( ) task {
2021-06-15 12:41:40 +08:00
return bt . parentTask
}
2021-10-13 21:26:33 +08:00
// GetChildTask function returns all the child tasks of the trigger task
// Child task may be loadSegmentTask, watchDmChannelTask or watchQueryChannelTask
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) getChildTask ( ) [ ] task {
2021-10-11 09:54:37 +08:00
bt . childTasksMu . RLock ( )
defer bt . childTasksMu . RUnlock ( )
2021-06-15 12:41:40 +08:00
return bt . childTasks
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) addChildTask ( t task ) {
2021-10-11 09:54:37 +08:00
bt . childTasksMu . Lock ( )
defer bt . childTasksMu . Unlock ( )
2021-06-15 12:41:40 +08:00
bt . childTasks = append ( bt . childTasks , t )
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) removeChildTaskByID ( taskID UniqueID ) {
2021-10-11 09:54:37 +08:00
bt . childTasksMu . Lock ( )
defer bt . childTasksMu . Unlock ( )
result := make ( [ ] task , 0 )
for _ , t := range bt . childTasks {
2021-10-14 20:18:33 +08:00
if t . getTaskID ( ) != taskID {
2021-10-11 09:54:37 +08:00
result = append ( result , t )
}
}
bt . childTasks = result
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) isValid ( ) bool {
2021-06-19 11:45:09 +08:00
return true
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) reschedule ( ctx context . Context ) ( [ ] task , error ) {
2021-06-19 11:45:09 +08:00
return nil , nil
}
2021-10-12 23:40:36 +08:00
// State returns the state of task, such as taskUndo, taskDoing, taskDone, taskExpired, taskFailed
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) getState ( ) taskState {
2021-10-11 09:54:37 +08:00
bt . stateMu . RLock ( )
defer bt . stateMu . RUnlock ( )
2021-06-19 11:45:09 +08:00
return bt . state
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) setState ( state taskState ) {
2021-10-11 09:54:37 +08:00
bt . stateMu . Lock ( )
defer bt . stateMu . Unlock ( )
2021-06-19 11:45:09 +08:00
bt . state = state
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) isRetryable ( ) bool {
2021-10-11 09:54:37 +08:00
return bt . retryCount > 0
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) setResultInfo ( err error ) {
2021-10-11 09:54:37 +08:00
bt . resultMu . Lock ( )
defer bt . resultMu . Unlock ( )
if bt . result == nil {
bt . result = & commonpb . Status { }
}
if err == nil {
bt . result . ErrorCode = commonpb . ErrorCode_Success
bt . result . Reason = ""
return
}
bt . result . ErrorCode = commonpb . ErrorCode_UnexpectedError
bt . result . Reason = bt . result . Reason + ", " + err . Error ( )
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) getResultInfo ( ) * commonpb . Status {
2021-10-11 09:54:37 +08:00
bt . resultMu . RLock ( )
defer bt . resultMu . RUnlock ( )
return proto . Clone ( bt . result ) . ( * commonpb . Status )
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) updateTaskProcess ( ) {
2021-10-11 09:54:37 +08:00
// TODO::
}
2021-10-18 21:34:47 +08:00
func ( bt * baseTask ) rollBack ( ctx context . Context ) [ ] task {
2021-10-11 09:54:37 +08:00
//TODO::
return nil
}
2021-10-18 21:34:47 +08:00
type loadCollectionTask struct {
* baseTask
2021-04-15 15:15:46 +08:00
* querypb . LoadCollectionRequest
2021-06-21 18:22:13 +08:00
rootCoord types . RootCoord
dataCoord types . DataCoord
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-08-02 22:39:25 +08:00
meta Meta
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return lct . Base
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( lct . LoadCollectionRequest )
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) msgType ( ) commonpb . MsgType {
2021-04-15 15:15:46 +08:00
return lct . Base . MsgType
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) timestamp ( ) Timestamp {
2021-04-15 15:15:46 +08:00
return lct . Base . Timestamp
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) updateTaskProcess ( ) {
2021-10-11 09:54:37 +08:00
collectionID := lct . CollectionID
2021-10-14 20:18:33 +08:00
childTasks := lct . getChildTask ( )
2021-10-11 09:54:37 +08:00
allDone := true
for _ , t := range childTasks {
2021-10-14 20:18:33 +08:00
if t . getState ( ) != taskDone {
2021-10-11 09:54:37 +08:00
allDone = false
}
}
if allDone {
err := lct . meta . setLoadPercentage ( collectionID , 0 , 100 , querypb . LoadType_loadCollection )
if err != nil {
log . Error ( "loadCollectionTask: set load percentage to meta's collectionInfo" , zap . Int64 ( "collectionID" , collectionID ) )
2021-10-14 20:18:33 +08:00
lct . setResultInfo ( err )
2021-10-11 09:54:37 +08:00
}
}
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) preExecute ( ctx context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := lct . CollectionID
schema := lct . Schema
2021-10-14 20:18:33 +08:00
lct . setResultInfo ( nil )
2021-10-18 21:34:47 +08:00
log . Debug ( "start do loadCollectionTask" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , lct . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Stringer ( "schema" , schema ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
lct . retryCount --
} ( )
2021-04-15 15:15:46 +08:00
collectionID := lct . CollectionID
showPartitionRequest := & milvuspb . ShowPartitionsRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_ShowPartitions ,
} ,
CollectionID : collectionID ,
}
2021-07-01 15:24:17 +08:00
showPartitionResponse , err := lct . rootCoord . ShowPartitions ( ctx , showPartitionRequest )
2021-04-15 15:15:46 +08:00
if err != nil {
2021-10-14 20:18:33 +08:00
lct . setResultInfo ( err )
2021-04-15 15:15:46 +08:00
return err
}
2021-06-24 21:10:13 +08:00
log . Debug ( "loadCollectionTask: get collection's all partitionIDs" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64s ( "partitionIDs" , showPartitionResponse . PartitionIDs ) )
partitionIDs := showPartitionResponse . PartitionIDs
2021-06-23 17:44:12 +08:00
toLoadPartitionIDs := make ( [ ] UniqueID , 0 )
2021-06-24 21:10:13 +08:00
hasCollection := lct . meta . hasCollection ( collectionID )
2021-06-26 16:08:11 +08:00
watchPartition := false
2021-06-24 21:10:13 +08:00
if hasCollection {
2021-06-26 16:08:11 +08:00
watchPartition = true
2021-08-02 22:39:25 +08:00
loadType , _ := lct . meta . getLoadType ( collectionID )
if loadType == querypb . LoadType_loadCollection {
2021-06-24 21:10:13 +08:00
for _ , partitionID := range partitionIDs {
hasReleasePartition := lct . meta . hasReleasePartition ( collectionID , partitionID )
if hasReleasePartition {
toLoadPartitionIDs = append ( toLoadPartitionIDs , partitionID )
}
}
} else {
for _ , partitionID := range partitionIDs {
hasPartition := lct . meta . hasPartition ( collectionID , partitionID )
if ! hasPartition {
toLoadPartitionIDs = append ( toLoadPartitionIDs , partitionID )
}
}
2021-06-23 17:44:12 +08:00
}
2021-06-24 21:10:13 +08:00
} else {
toLoadPartitionIDs = partitionIDs
2021-06-23 17:44:12 +08:00
}
2021-06-24 21:10:13 +08:00
2021-06-23 17:44:12 +08:00
log . Debug ( "loadCollectionTask: toLoadPartitionIDs" , zap . Int64s ( "partitionIDs" , toLoadPartitionIDs ) )
2021-06-24 21:10:13 +08:00
lct . meta . addCollection ( collectionID , lct . Schema )
2021-08-02 22:39:25 +08:00
lct . meta . setLoadType ( collectionID , querypb . LoadType_loadCollection )
2021-06-24 21:10:13 +08:00
for _ , id := range toLoadPartitionIDs {
lct . meta . addPartition ( collectionID , id )
}
2021-06-23 17:44:12 +08:00
2021-06-26 16:08:11 +08:00
loadSegmentReqs := make ( [ ] * querypb . LoadSegmentsRequest , 0 )
watchDmChannelReqs := make ( [ ] * querypb . WatchDmChannelsRequest , 0 )
2021-06-15 12:41:40 +08:00
channelsToWatch := make ( [ ] string , 0 )
segmentsToLoad := make ( [ ] UniqueID , 0 )
2021-06-23 17:44:12 +08:00
for _ , partitionID := range toLoadPartitionIDs {
2021-06-16 11:09:56 +08:00
getRecoveryInfoRequest := & datapb . GetRecoveryInfoRequest {
2021-06-15 12:41:40 +08:00
Base : lct . Base ,
CollectionID : collectionID ,
PartitionID : partitionID ,
}
2021-06-30 16:18:13 +08:00
recoveryInfo , err := lct . dataCoord . GetRecoveryInfo ( ctx , getRecoveryInfoRequest )
2021-06-15 12:41:40 +08:00
if err != nil {
2021-10-14 20:18:33 +08:00
lct . setResultInfo ( err )
2021-06-15 12:41:40 +08:00
return err
}
2021-04-15 15:15:46 +08:00
2021-06-15 12:41:40 +08:00
for _ , segmentBingLog := range recoveryInfo . Binlogs {
segmentID := segmentBingLog . SegmentID
segmentLoadInfo := & querypb . SegmentLoadInfo {
2021-06-26 16:08:11 +08:00
SegmentID : segmentID ,
2021-06-15 12:41:40 +08:00
PartitionID : partitionID ,
CollectionID : collectionID ,
2021-06-26 16:08:11 +08:00
BinlogPaths : segmentBingLog . FieldBinlogs ,
2021-09-07 11:35:18 +08:00
NumOfRows : segmentBingLog . NumOfRows ,
2021-10-22 14:31:13 +08:00
Statslogs : segmentBingLog . Statslogs ,
Deltalogs : segmentBingLog . Deltalogs ,
2021-04-15 15:15:46 +08:00
}
2021-06-26 16:08:11 +08:00
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lct . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_LoadSegments
2021-06-26 16:08:11 +08:00
loadSegmentReq := & querypb . LoadSegmentsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-26 16:08:11 +08:00
Infos : [ ] * querypb . SegmentLoadInfo { segmentLoadInfo } ,
Schema : lct . Schema ,
LoadCondition : querypb . TriggerCondition_grpcRequest ,
}
2021-06-15 12:41:40 +08:00
segmentsToLoad = append ( segmentsToLoad , segmentID )
2021-06-26 16:08:11 +08:00
loadSegmentReqs = append ( loadSegmentReqs , loadSegmentReq )
2021-04-15 15:15:46 +08:00
}
2021-06-15 12:41:40 +08:00
for _ , info := range recoveryInfo . Channels {
channel := info . ChannelName
2021-06-26 16:08:11 +08:00
if ! watchPartition {
merged := false
for index , channelName := range channelsToWatch {
if channel == channelName {
merged = true
oldInfo := watchDmChannelReqs [ index ] . Infos [ 0 ]
newInfo := mergeVChannelInfo ( oldInfo , info )
watchDmChannelReqs [ index ] . Infos = [ ] * datapb . VchannelInfo { newInfo }
break
}
}
if ! merged {
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lct . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchDmChannels
2021-06-26 16:08:11 +08:00
watchRequest := & querypb . WatchDmChannelsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-26 16:08:11 +08:00
CollectionID : collectionID ,
Infos : [ ] * datapb . VchannelInfo { info } ,
Schema : lct . Schema ,
}
channelsToWatch = append ( channelsToWatch , channel )
watchDmChannelReqs = append ( watchDmChannelReqs , watchRequest )
}
} else {
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lct . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchDmChannels
2021-06-15 12:41:40 +08:00
watchRequest := & querypb . WatchDmChannelsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-15 12:41:40 +08:00
CollectionID : collectionID ,
2021-06-26 16:08:11 +08:00
PartitionID : partitionID ,
2021-06-16 11:09:56 +08:00
Infos : [ ] * datapb . VchannelInfo { info } ,
2021-06-15 12:41:40 +08:00
Schema : lct . Schema ,
}
channelsToWatch = append ( channelsToWatch , channel )
2021-06-26 16:08:11 +08:00
watchDmChannelReqs = append ( watchDmChannelReqs , watchRequest )
2021-06-15 12:41:40 +08:00
}
2021-04-15 15:15:46 +08:00
}
}
2021-10-11 09:54:37 +08:00
err = assignInternalTask ( ctx , collectionID , lct , lct . meta , lct . cluster , loadSegmentReqs , watchDmChannelReqs , false )
2021-09-29 09:56:04 +08:00
if err != nil {
2021-10-11 09:54:37 +08:00
log . Warn ( "loadCollectionTask: assign child task failed" , zap . Int64 ( "collectionID" , collectionID ) )
2021-10-14 20:18:33 +08:00
lct . setResultInfo ( err )
2021-09-29 09:56:04 +08:00
return err
}
2021-06-26 16:08:11 +08:00
log . Debug ( "loadCollectionTask: assign child task done" , zap . Int64 ( "collectionID" , collectionID ) )
2021-06-15 12:41:40 +08:00
2021-04-15 15:15:46 +08:00
log . Debug ( "LoadCollection execute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , lct . getTaskID ( ) ) ,
2021-06-15 12:41:40 +08:00
zap . Int64 ( "collectionID" , collectionID ) )
2021-04-15 15:15:46 +08:00
return nil
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) postExecute ( ctx context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := lct . CollectionID
2021-06-23 17:44:12 +08:00
if lct . result . ErrorCode != commonpb . ErrorCode_Success {
2021-10-11 09:54:37 +08:00
lct . childTasks = [ ] task { }
err := lct . meta . releaseCollection ( collectionID )
2021-06-30 17:48:19 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Error ( "loadCollectionTask: occur error when release collection info from meta" , zap . Error ( err ) )
2021-06-23 17:44:12 +08:00
}
}
2021-10-11 09:54:37 +08:00
2021-10-18 21:34:47 +08:00
log . Debug ( "loadCollectionTask postExecute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , lct . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lct * loadCollectionTask ) rollBack ( ctx context . Context ) [ ] task {
2021-10-11 09:54:37 +08:00
nodes , _ := lct . cluster . onlineNodes ( )
resultTasks := make ( [ ] task , 0 )
//TODO::call rootCoord.ReleaseDQLMessageStream
for nodeID := range nodes {
//brute force rollBack, should optimize
req := & querypb . ReleaseCollectionRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_ReleaseCollection ,
MsgID : lct . Base . MsgID ,
Timestamp : lct . Base . Timestamp ,
SourceID : lct . Base . SourceID ,
} ,
DbID : lct . DbID ,
CollectionID : lct . CollectionID ,
NodeID : nodeID ,
}
baseTask := newBaseTask ( ctx , querypb . TriggerCondition_grpcRequest )
2021-10-14 20:18:33 +08:00
baseTask . setParentTask ( lct )
2021-10-18 21:34:47 +08:00
releaseCollectionTask := & releaseCollectionTask {
baseTask : baseTask ,
2021-10-11 09:54:37 +08:00
ReleaseCollectionRequest : req ,
cluster : lct . cluster ,
}
resultTasks = append ( resultTasks , releaseCollectionTask )
}
log . Debug ( "loadCollectionTask: rollBack loadCollectionTask" , zap . Any ( "loadCollectionTask" , lct ) , zap . Any ( "rollBack task" , resultTasks ) )
return resultTasks
}
2021-10-18 21:34:47 +08:00
// releaseCollectionTask will release all the data of this collection on query nodes
type releaseCollectionTask struct {
* baseTask
2021-04-15 15:15:46 +08:00
* querypb . ReleaseCollectionRequest
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-08-02 22:39:25 +08:00
meta Meta
2021-07-02 10:40:13 +08:00
rootCoord types . RootCoord
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return rct . Base
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( rct . ReleaseCollectionRequest )
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) msgType ( ) commonpb . MsgType {
2021-04-15 15:15:46 +08:00
return rct . Base . MsgType
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) timestamp ( ) Timestamp {
2021-04-15 15:15:46 +08:00
return rct . Base . Timestamp
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) preExecute ( context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := rct . CollectionID
2021-10-14 20:18:33 +08:00
rct . setResultInfo ( nil )
2021-10-18 21:34:47 +08:00
log . Debug ( "start do releaseCollectionTask" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , rct . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
rct . retryCount --
} ( )
2021-04-15 15:15:46 +08:00
collectionID := rct . CollectionID
2021-10-11 09:54:37 +08:00
2021-09-18 18:45:51 +08:00
// if nodeID ==0, it means that the release request has not been assigned to the specified query node
2021-06-19 11:45:09 +08:00
if rct . NodeID <= 0 {
2021-07-02 10:40:13 +08:00
rct . meta . releaseCollection ( collectionID )
releaseDQLMessageStreamReq := & proxypb . ReleaseDQLMessageStreamRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_RemoveQueryChannels ,
MsgID : rct . Base . MsgID ,
Timestamp : rct . Base . Timestamp ,
SourceID : rct . Base . SourceID ,
} ,
DbID : rct . DbID ,
CollectionID : rct . CollectionID ,
}
res , err := rct . rootCoord . ReleaseDQLMessageStream ( rct . ctx , releaseDQLMessageStreamReq )
2021-10-11 09:54:37 +08:00
if res . ErrorCode != commonpb . ErrorCode_Success || err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "releaseCollectionTask: release collection end, releaseDQLMessageStream occur error" , zap . Int64 ( "collectionID" , rct . CollectionID ) )
2021-07-02 10:40:13 +08:00
err = errors . New ( "rootCoord releaseDQLMessageStream failed" )
2021-10-14 20:18:33 +08:00
rct . setResultInfo ( err )
2021-07-02 10:40:13 +08:00
return err
}
2021-09-15 20:40:07 +08:00
nodes , err := rct . cluster . onlineNodes ( )
2021-06-30 17:48:19 +08:00
if err != nil {
log . Debug ( err . Error ( ) )
}
for nodeID := range nodes {
2021-06-19 11:45:09 +08:00
req := proto . Clone ( rct . ReleaseCollectionRequest ) . ( * querypb . ReleaseCollectionRequest )
req . NodeID = nodeID
2021-10-11 09:54:37 +08:00
baseTask := newBaseTask ( ctx , querypb . TriggerCondition_grpcRequest )
2021-10-14 20:18:33 +08:00
baseTask . setParentTask ( rct )
2021-10-18 21:34:47 +08:00
releaseCollectionTask := & releaseCollectionTask {
baseTask : baseTask ,
2021-06-19 11:45:09 +08:00
ReleaseCollectionRequest : req ,
2021-06-15 12:41:40 +08:00
cluster : rct . cluster ,
}
2021-10-11 09:54:37 +08:00
2021-10-14 20:18:33 +08:00
rct . addChildTask ( releaseCollectionTask )
2021-10-18 21:34:47 +08:00
log . Debug ( "releaseCollectionTask: add a releaseCollectionTask to releaseCollectionTask's childTask" , zap . Any ( "task" , releaseCollectionTask ) )
2021-06-15 12:41:40 +08:00
}
} else {
2021-08-02 22:39:25 +08:00
err := rct . cluster . releaseCollection ( ctx , rct . NodeID , rct . ReleaseCollectionRequest )
2021-04-15 15:15:46 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "releaseCollectionTask: release collection end, node occur error" , zap . Int64 ( "nodeID" , rct . NodeID ) )
2021-10-14 20:18:33 +08:00
rct . setResultInfo ( err )
2021-06-23 17:44:12 +08:00
return err
}
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
log . Debug ( "releaseCollectionTask Execute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , rct . getTaskID ( ) ) ,
2021-06-15 12:41:40 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "nodeID" , rct . NodeID ) )
2021-04-15 15:15:46 +08:00
return nil
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) postExecute ( context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := rct . CollectionID
2021-10-11 09:54:37 +08:00
if rct . result . ErrorCode != commonpb . ErrorCode_Success {
rct . childTasks = [ ] task { }
}
2021-04-15 15:15:46 +08:00
2021-10-18 21:34:47 +08:00
log . Debug ( "releaseCollectionTask postExecute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , rct . getTaskID ( ) ) ,
2021-06-15 12:41:40 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "nodeID" , rct . NodeID ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rct * releaseCollectionTask ) rollBack ( ctx context . Context ) [ ] task {
2021-10-11 09:54:37 +08:00
//TODO::
//if taskID == 0, recovery meta
//if taskID != 0, recovery collection on queryNode
return nil
}
2021-10-18 21:34:47 +08:00
// loadPartitionTask will load all the data of this partition to query nodes
type loadPartitionTask struct {
* baseTask
2021-04-15 15:15:46 +08:00
* querypb . LoadPartitionsRequest
2021-06-21 18:22:13 +08:00
dataCoord types . DataCoord
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-08-02 22:39:25 +08:00
meta Meta
2021-06-23 17:44:12 +08:00
addCol bool
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return lpt . Base
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( lpt . LoadPartitionsRequest )
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) msgType ( ) commonpb . MsgType {
2021-04-15 15:15:46 +08:00
return lpt . Base . MsgType
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) timestamp ( ) Timestamp {
2021-04-15 15:15:46 +08:00
return lpt . Base . Timestamp
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) updateTaskProcess ( ) {
2021-04-15 15:15:46 +08:00
collectionID := lpt . CollectionID
2021-10-11 09:54:37 +08:00
partitionIDs := lpt . PartitionIDs
2021-10-14 20:18:33 +08:00
childTasks := lpt . getChildTask ( )
2021-10-11 09:54:37 +08:00
allDone := true
for _ , t := range childTasks {
2021-10-14 20:18:33 +08:00
if t . getState ( ) != taskDone {
2021-10-11 09:54:37 +08:00
allDone = false
}
}
if allDone {
for _ , id := range partitionIDs {
err := lpt . meta . setLoadPercentage ( collectionID , id , 100 , querypb . LoadType_LoadPartition )
if err != nil {
log . Error ( "loadPartitionTask: set load percentage to meta's collectionInfo" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64 ( "partitionID" , id ) )
2021-10-14 20:18:33 +08:00
lpt . setResultInfo ( err )
2021-10-11 09:54:37 +08:00
}
}
2021-06-23 17:44:12 +08:00
}
2021-10-11 09:54:37 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) preExecute ( context . Context ) error {
2021-10-11 09:54:37 +08:00
collectionID := lpt . CollectionID
2021-10-14 20:18:33 +08:00
lpt . setResultInfo ( nil )
2021-10-18 21:34:47 +08:00
log . Debug ( "start do loadPartitionTask" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , lpt . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
lpt . retryCount --
} ( )
2021-04-15 15:15:46 +08:00
collectionID := lpt . CollectionID
partitionIDs := lpt . PartitionIDs
2021-06-23 17:44:12 +08:00
if ! lpt . meta . hasCollection ( collectionID ) {
lpt . meta . addCollection ( collectionID , lpt . Schema )
lpt . addCol = true
}
2021-06-19 11:45:09 +08:00
for _ , id := range partitionIDs {
lpt . meta . addPartition ( collectionID , id )
}
2021-04-15 15:15:46 +08:00
2021-06-15 12:41:40 +08:00
segmentsToLoad := make ( [ ] UniqueID , 0 )
2021-06-26 16:08:11 +08:00
loadSegmentReqs := make ( [ ] * querypb . LoadSegmentsRequest , 0 )
2021-06-15 12:41:40 +08:00
channelsToWatch := make ( [ ] string , 0 )
2021-06-26 16:08:11 +08:00
watchDmReqs := make ( [ ] * querypb . WatchDmChannelsRequest , 0 )
2021-04-15 15:15:46 +08:00
for _ , partitionID := range partitionIDs {
2021-06-16 11:09:56 +08:00
getRecoveryInfoRequest := & datapb . GetRecoveryInfoRequest {
2021-06-15 12:41:40 +08:00
Base : lpt . Base ,
2021-04-15 15:15:46 +08:00
CollectionID : collectionID ,
PartitionID : partitionID ,
}
2021-06-30 16:18:13 +08:00
recoveryInfo , err := lpt . dataCoord . GetRecoveryInfo ( ctx , getRecoveryInfoRequest )
2021-04-15 15:15:46 +08:00
if err != nil {
2021-10-14 20:18:33 +08:00
lpt . setResultInfo ( err )
2021-04-15 15:15:46 +08:00
return err
}
2021-06-15 12:41:40 +08:00
for _ , segmentBingLog := range recoveryInfo . Binlogs {
segmentID := segmentBingLog . SegmentID
segmentLoadInfo := & querypb . SegmentLoadInfo {
SegmentID : segmentID ,
2021-04-15 15:15:46 +08:00
PartitionID : partitionID ,
2021-06-15 12:41:40 +08:00
CollectionID : collectionID ,
2021-06-26 16:08:11 +08:00
BinlogPaths : segmentBingLog . FieldBinlogs ,
2021-09-07 11:35:18 +08:00
NumOfRows : segmentBingLog . NumOfRows ,
2021-10-22 14:31:13 +08:00
Statslogs : segmentBingLog . Statslogs ,
Deltalogs : segmentBingLog . Deltalogs ,
2021-06-26 16:08:11 +08:00
}
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lpt . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_LoadSegments
2021-06-26 16:08:11 +08:00
loadSegmentReq := & querypb . LoadSegmentsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-26 16:08:11 +08:00
Infos : [ ] * querypb . SegmentLoadInfo { segmentLoadInfo } ,
Schema : lpt . Schema ,
LoadCondition : querypb . TriggerCondition_grpcRequest ,
2021-04-15 15:15:46 +08:00
}
2021-06-15 12:41:40 +08:00
segmentsToLoad = append ( segmentsToLoad , segmentID )
2021-06-26 16:08:11 +08:00
loadSegmentReqs = append ( loadSegmentReqs , loadSegmentReq )
2021-06-15 12:41:40 +08:00
}
for _ , info := range recoveryInfo . Channels {
channel := info . ChannelName
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lpt . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchDmChannels
2021-06-26 16:08:11 +08:00
watchDmRequest := & querypb . WatchDmChannelsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-15 12:41:40 +08:00
CollectionID : collectionID ,
PartitionID : partitionID ,
2021-06-16 11:09:56 +08:00
Infos : [ ] * datapb . VchannelInfo { info } ,
2021-06-15 12:41:40 +08:00
Schema : lpt . Schema ,
2021-04-15 15:15:46 +08:00
}
2021-06-15 12:41:40 +08:00
channelsToWatch = append ( channelsToWatch , channel )
2021-06-26 16:08:11 +08:00
watchDmReqs = append ( watchDmReqs , watchDmRequest )
2021-10-18 21:34:47 +08:00
log . Debug ( "loadPartitionTask: set watchDmChannelsRequests" , zap . Any ( "request" , watchDmRequest ) , zap . Int64 ( "collectionID" , collectionID ) )
2021-04-15 15:15:46 +08:00
}
}
2021-10-11 09:54:37 +08:00
err := assignInternalTask ( ctx , collectionID , lpt , lpt . meta , lpt . cluster , loadSegmentReqs , watchDmReqs , false )
2021-09-29 09:56:04 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "loadPartitionTask: assign child task failed" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64s ( "partitionIDs" , partitionIDs ) )
2021-10-14 20:18:33 +08:00
lpt . setResultInfo ( err )
2021-09-29 09:56:04 +08:00
return err
}
2021-10-18 21:34:47 +08:00
log . Debug ( "loadPartitionTask: assign child task done" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64s ( "partitionIDs" , partitionIDs ) )
2021-04-15 15:15:46 +08:00
2021-10-18 21:34:47 +08:00
log . Debug ( "loadPartitionTask Execute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , lpt . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) )
return nil
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) postExecute ( ctx context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := lpt . CollectionID
partitionIDs := lpt . PartitionIDs
2021-06-23 17:44:12 +08:00
if lpt . result . ErrorCode != commonpb . ErrorCode_Success {
2021-10-11 09:54:37 +08:00
lpt . childTasks = [ ] task { }
2021-06-23 17:44:12 +08:00
if lpt . addCol {
2021-10-11 09:54:37 +08:00
err := lpt . meta . releaseCollection ( collectionID )
2021-06-30 17:48:19 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Error ( "loadPartitionTask: occur error when release collection info from meta" , zap . Error ( err ) )
2021-06-23 17:44:12 +08:00
}
} else {
2021-10-11 09:54:37 +08:00
for _ , partitionID := range partitionIDs {
err := lpt . meta . releasePartition ( collectionID , partitionID )
if err != nil {
2021-10-18 21:34:47 +08:00
log . Error ( "loadPartitionTask: occur error when release partition info from meta" , zap . Error ( err ) )
2021-06-23 17:44:12 +08:00
}
}
}
}
2021-10-11 09:54:37 +08:00
2021-10-18 21:34:47 +08:00
log . Debug ( "loadPartitionTask postExecute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , lpt . getTaskID ( ) ) ,
2021-06-15 12:41:40 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lpt * loadPartitionTask ) rollBack ( ctx context . Context ) [ ] task {
2021-10-11 09:54:37 +08:00
partitionIDs := lpt . PartitionIDs
resultTasks := make ( [ ] task , 0 )
//brute force rollBack, should optimize
if lpt . addCol {
nodes , _ := lpt . cluster . onlineNodes ( )
for nodeID := range nodes {
req := & querypb . ReleaseCollectionRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_ReleaseCollection ,
MsgID : lpt . Base . MsgID ,
Timestamp : lpt . Base . Timestamp ,
SourceID : lpt . Base . SourceID ,
} ,
DbID : lpt . DbID ,
CollectionID : lpt . CollectionID ,
NodeID : nodeID ,
}
baseTask := newBaseTask ( ctx , querypb . TriggerCondition_grpcRequest )
2021-10-14 20:18:33 +08:00
baseTask . setParentTask ( lpt )
2021-10-18 21:34:47 +08:00
releaseCollectionTask := & releaseCollectionTask {
baseTask : baseTask ,
2021-10-11 09:54:37 +08:00
ReleaseCollectionRequest : req ,
cluster : lpt . cluster ,
}
resultTasks = append ( resultTasks , releaseCollectionTask )
}
} else {
nodes , _ := lpt . cluster . onlineNodes ( )
for nodeID := range nodes {
req := & querypb . ReleasePartitionsRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_ReleasePartitions ,
MsgID : lpt . Base . MsgID ,
Timestamp : lpt . Base . Timestamp ,
SourceID : lpt . Base . SourceID ,
} ,
DbID : lpt . DbID ,
CollectionID : lpt . CollectionID ,
PartitionIDs : partitionIDs ,
NodeID : nodeID ,
}
baseTask := newBaseTask ( ctx , querypb . TriggerCondition_grpcRequest )
2021-10-14 20:18:33 +08:00
baseTask . setParentTask ( lpt )
2021-10-18 21:34:47 +08:00
releasePartitionTask := & releasePartitionTask {
baseTask : baseTask ,
2021-10-11 09:54:37 +08:00
ReleasePartitionsRequest : req ,
cluster : lpt . cluster ,
}
resultTasks = append ( resultTasks , releasePartitionTask )
}
}
log . Debug ( "loadPartitionTask: rollBack loadPartitionTask" , zap . Any ( "loadPartitionTask" , lpt ) , zap . Any ( "rollBack task" , resultTasks ) )
return resultTasks
}
2021-10-18 21:34:47 +08:00
// releasePartitionTask will release all the data of this partition on query nodes
type releasePartitionTask struct {
* baseTask
2021-04-15 15:15:46 +08:00
* querypb . ReleasePartitionsRequest
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return rpt . Base
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( rpt . ReleasePartitionsRequest )
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) msgType ( ) commonpb . MsgType {
2021-04-15 15:15:46 +08:00
return rpt . Base . MsgType
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) timestamp ( ) Timestamp {
2021-04-15 15:15:46 +08:00
return rpt . Base . Timestamp
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) preExecute ( context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := rpt . CollectionID
2021-10-14 20:18:33 +08:00
rpt . setResultInfo ( nil )
2021-04-15 15:15:46 +08:00
log . Debug ( "start do releasePartitionTask" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , rpt . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
rpt . retryCount --
} ( )
2021-04-15 15:15:46 +08:00
collectionID := rpt . CollectionID
partitionIDs := rpt . PartitionIDs
2021-10-11 09:54:37 +08:00
2021-09-23 21:06:02 +08:00
// if nodeID ==0, it means that the release request has not been assigned to the specified query node
2021-06-19 11:45:09 +08:00
if rpt . NodeID <= 0 {
2021-09-15 20:40:07 +08:00
nodes , err := rpt . cluster . onlineNodes ( )
2021-06-30 17:48:19 +08:00
if err != nil {
log . Debug ( err . Error ( ) )
}
for nodeID := range nodes {
2021-06-19 11:45:09 +08:00
req := proto . Clone ( rpt . ReleasePartitionsRequest ) . ( * querypb . ReleasePartitionsRequest )
req . NodeID = nodeID
2021-10-11 09:54:37 +08:00
baseTask := newBaseTask ( ctx , querypb . TriggerCondition_grpcRequest )
2021-10-14 20:18:33 +08:00
baseTask . setParentTask ( rpt )
2021-10-18 21:34:47 +08:00
releasePartitionTask := & releasePartitionTask {
baseTask : baseTask ,
2021-06-19 11:45:09 +08:00
ReleasePartitionsRequest : req ,
2021-06-15 12:41:40 +08:00
cluster : rpt . cluster ,
}
2021-10-14 20:18:33 +08:00
rpt . addChildTask ( releasePartitionTask )
2021-10-18 21:34:47 +08:00
log . Debug ( "releasePartitionTask: add a releasePartitionTask to releasePartitionTask's childTask" , zap . Any ( "task" , releasePartitionTask ) )
2021-06-15 12:41:40 +08:00
}
} else {
2021-08-02 22:39:25 +08:00
err := rpt . cluster . releasePartitions ( ctx , rpt . NodeID , rpt . ReleasePartitionsRequest )
2021-04-15 15:15:46 +08:00
if err != nil {
2021-10-11 09:54:37 +08:00
log . Warn ( "ReleasePartitionsTask: release partition end, node occur error" , zap . String ( "nodeID" , fmt . Sprintln ( rpt . NodeID ) ) )
2021-10-14 20:18:33 +08:00
rpt . setResultInfo ( err )
2021-06-23 17:44:12 +08:00
return err
}
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
log . Debug ( "releasePartitionTask Execute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , rpt . getTaskID ( ) ) ,
2021-04-15 15:15:46 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
2021-06-15 12:41:40 +08:00
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "nodeID" , rpt . NodeID ) )
2021-04-15 15:15:46 +08:00
return nil
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) postExecute ( context . Context ) error {
2021-04-15 15:15:46 +08:00
collectionID := rpt . CollectionID
partitionIDs := rpt . PartitionIDs
2021-10-11 09:54:37 +08:00
if rpt . result . ErrorCode != commonpb . ErrorCode_Success {
rpt . childTasks = [ ] task { }
}
2021-06-15 12:41:40 +08:00
2021-10-18 21:34:47 +08:00
log . Debug ( "releasePartitionTask postExecute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "msgID" , rpt . getTaskID ( ) ) ,
2021-06-15 12:41:40 +08:00
zap . Int64 ( "collectionID" , collectionID ) ,
zap . Int64s ( "partitionIDs" , partitionIDs ) ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "nodeID" , rpt . NodeID ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rpt * releasePartitionTask ) rollBack ( ctx context . Context ) [ ] task {
2021-10-11 09:54:37 +08:00
//TODO::
//if taskID == 0, recovery meta
//if taskID != 0, recovery partition on queryNode
return nil
}
2021-10-18 21:34:47 +08:00
type loadSegmentTask struct {
* baseTask
2021-06-15 12:41:40 +08:00
* querypb . LoadSegmentsRequest
2021-10-11 09:54:37 +08:00
meta Meta
cluster Cluster
excludeNodeIDs [ ] int64
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return lst . Base
}
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( lst . LoadSegmentsRequest )
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) isValid ( ) bool {
2021-10-22 19:07:15 +08:00
online , err := lst . cluster . isOnline ( lst . DstNodeID )
2021-06-30 17:48:19 +08:00
if err != nil {
return false
}
2021-10-11 09:54:37 +08:00
return lst . ctx != nil && online
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) msgType ( ) commonpb . MsgType {
2021-06-15 12:41:40 +08:00
return lst . Base . MsgType
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) timestamp ( ) Timestamp {
2021-06-15 12:41:40 +08:00
return lst . Base . Timestamp
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) updateTaskProcess ( ) {
2021-10-14 20:18:33 +08:00
parentTask := lst . getParentTask ( )
2021-10-11 09:54:37 +08:00
if parentTask == nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "loadSegmentTask: parentTask should not be nil" )
2021-10-11 09:54:37 +08:00
return
}
2021-10-14 20:18:33 +08:00
parentTask . updateTaskProcess ( )
2021-10-11 09:54:37 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) preExecute ( context . Context ) error {
2021-06-15 12:41:40 +08:00
segmentIDs := make ( [ ] UniqueID , 0 )
for _ , info := range lst . Infos {
segmentIDs = append ( segmentIDs , info . SegmentID )
}
2021-10-14 20:18:33 +08:00
lst . setResultInfo ( nil )
2021-06-15 12:41:40 +08:00
log . Debug ( "start do loadSegmentTask" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "segmentIDs" , segmentIDs ) ,
2021-10-22 19:07:15 +08:00
zap . Int64 ( "loaded nodeID" , lst . DstNodeID ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , lst . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
lst . retryCount --
} ( )
2021-10-22 19:07:15 +08:00
err := lst . cluster . loadSegments ( ctx , lst . DstNodeID , lst . LoadSegmentsRequest )
2021-04-15 15:15:46 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "loadSegmentTask: loadSegment occur error" , zap . Int64 ( "taskID" , lst . getTaskID ( ) ) )
2021-10-14 20:18:33 +08:00
lst . setResultInfo ( err )
2021-04-15 15:15:46 +08:00
return err
}
2021-06-15 12:41:40 +08:00
2021-06-19 11:45:09 +08:00
log . Debug ( "loadSegmentTask Execute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , lst . getTaskID ( ) ) )
2021-06-15 12:41:40 +08:00
return nil
}
2021-10-11 09:54:37 +08:00
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) postExecute ( context . Context ) error {
2021-06-19 11:45:09 +08:00
log . Debug ( "loadSegmentTask postExecute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , lst . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lst * loadSegmentTask ) reschedule ( ctx context . Context ) ( [ ] task , error ) {
2021-06-19 11:45:09 +08:00
segmentIDs := make ( [ ] UniqueID , 0 )
collectionID := lst . Infos [ 0 ] . CollectionID
reScheduledTask := make ( [ ] task , 0 )
for _ , info := range lst . Infos {
2021-07-13 14:16:00 +08:00
segmentIDs = append ( segmentIDs , info . SegmentID )
2021-06-19 11:45:09 +08:00
}
2021-10-22 19:07:15 +08:00
lst . excludeNodeIDs = append ( lst . excludeNodeIDs , lst . DstNodeID )
2021-10-11 09:54:37 +08:00
segment2Nodes , err := shuffleSegmentsToQueryNode ( segmentIDs , lst . cluster , false , lst . excludeNodeIDs )
if err != nil {
log . Error ( "loadSegment reschedule failed" , zap . Int64s ( "excludeNodes" , lst . excludeNodeIDs ) , zap . Error ( err ) )
return nil , err
}
2021-06-19 11:45:09 +08:00
node2segmentInfos := make ( map [ int64 ] [ ] * querypb . SegmentLoadInfo )
2021-07-13 14:16:00 +08:00
for index , info := range lst . Infos {
nodeID := segment2Nodes [ index ]
2021-06-19 11:45:09 +08:00
if _ , ok := node2segmentInfos [ nodeID ] ; ! ok {
node2segmentInfos [ nodeID ] = make ( [ ] * querypb . SegmentLoadInfo , 0 )
}
node2segmentInfos [ nodeID ] = append ( node2segmentInfos [ nodeID ] , info )
}
for nodeID , infos := range node2segmentInfos {
2021-10-14 20:18:33 +08:00
loadSegmentBaseTask := newBaseTask ( ctx , lst . getTriggerCondition ( ) )
loadSegmentBaseTask . setParentTask ( lst . getParentTask ( ) )
2021-10-18 21:34:47 +08:00
loadSegmentTask := & loadSegmentTask {
baseTask : loadSegmentBaseTask ,
2021-06-19 11:45:09 +08:00
LoadSegmentsRequest : & querypb . LoadSegmentsRequest {
Base : lst . Base ,
2021-10-22 19:07:15 +08:00
DstNodeID : nodeID ,
2021-06-19 11:45:09 +08:00
Infos : infos ,
Schema : lst . Schema ,
LoadCondition : lst . LoadCondition ,
} ,
2021-10-11 09:54:37 +08:00
meta : lst . meta ,
cluster : lst . cluster ,
excludeNodeIDs : lst . excludeNodeIDs ,
2021-06-19 11:45:09 +08:00
}
reScheduledTask = append ( reScheduledTask , loadSegmentTask )
2021-10-18 21:34:47 +08:00
log . Debug ( "loadSegmentTask: add a loadSegmentTask to RescheduleTasks" , zap . Any ( "task" , loadSegmentTask ) )
2021-06-19 11:45:09 +08:00
hasWatchQueryChannel := lst . cluster . hasWatchedQueryChannel ( lst . ctx , nodeID , collectionID )
if ! hasWatchQueryChannel {
2021-10-22 19:07:15 +08:00
queryChannelInfo , err := lst . meta . getQueryChannelInfoByID ( collectionID )
2021-09-29 09:56:04 +08:00
if err != nil {
return nil , err
}
2021-06-19 11:45:09 +08:00
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lst . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchQueryChannels
2021-06-19 11:45:09 +08:00
addQueryChannelRequest := & querypb . AddQueryChannelRequest {
2021-10-22 19:07:15 +08:00
Base : msgBase ,
NodeID : nodeID ,
CollectionID : collectionID ,
RequestChannelID : queryChannelInfo . QueryChannelID ,
ResultChannelID : queryChannelInfo . QueryResultChannelID ,
GlobalSealedSegments : queryChannelInfo . GlobalSealedSegments ,
SeekPosition : queryChannelInfo . SeekPosition ,
2021-06-19 11:45:09 +08:00
}
2021-10-14 20:18:33 +08:00
watchQueryChannelBaseTask := newBaseTask ( ctx , lst . getTriggerCondition ( ) )
watchQueryChannelBaseTask . setParentTask ( lst . getParentTask ( ) )
2021-10-18 21:34:47 +08:00
watchQueryChannelTask := & watchQueryChannelTask {
baseTask : watchQueryChannelBaseTask ,
2021-06-19 11:45:09 +08:00
AddQueryChannelRequest : addQueryChannelRequest ,
cluster : lst . cluster ,
}
reScheduledTask = append ( reScheduledTask , watchQueryChannelTask )
2021-10-18 21:34:47 +08:00
log . Debug ( "loadSegmentTask: add a watchQueryChannelTask to RescheduleTasks" , zap . Any ( "task" , watchQueryChannelTask ) )
2021-06-19 11:45:09 +08:00
}
}
return reScheduledTask , nil
}
2021-10-18 21:34:47 +08:00
type releaseSegmentTask struct {
* baseTask
2021-06-15 12:41:40 +08:00
* querypb . ReleaseSegmentsRequest
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return rst . Base
}
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( rst . ReleaseSegmentsRequest )
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) isValid ( ) bool {
2021-10-11 09:54:37 +08:00
online , err := rst . cluster . isOnline ( rst . NodeID )
2021-06-30 17:48:19 +08:00
if err != nil {
return false
}
2021-10-11 09:54:37 +08:00
return rst . ctx != nil && online
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) msgType ( ) commonpb . MsgType {
2021-06-15 12:41:40 +08:00
return rst . Base . MsgType
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) timestamp ( ) Timestamp {
2021-06-15 12:41:40 +08:00
return rst . Base . Timestamp
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) preExecute ( context . Context ) error {
2021-06-15 12:41:40 +08:00
segmentIDs := rst . SegmentIDs
2021-10-14 20:18:33 +08:00
rst . setResultInfo ( nil )
2021-06-15 12:41:40 +08:00
log . Debug ( "start do releaseSegmentTask" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "segmentIDs" , segmentIDs ) ,
zap . Int64 ( "loaded nodeID" , rst . NodeID ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , rst . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
rst . retryCount --
} ( )
2021-08-02 22:39:25 +08:00
err := rst . cluster . releaseSegments ( rst . ctx , rst . NodeID , rst . ReleaseSegmentsRequest )
2021-06-15 12:41:40 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "releaseSegmentTask: releaseSegment occur error" , zap . Int64 ( "taskID" , rst . getTaskID ( ) ) )
2021-10-14 20:18:33 +08:00
rst . setResultInfo ( err )
2021-06-15 12:41:40 +08:00
return err
}
log . Debug ( "releaseSegmentTask Execute done" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "segmentIDs" , rst . SegmentIDs ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , rst . getTaskID ( ) ) )
2021-06-15 12:41:40 +08:00
return nil
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( rst * releaseSegmentTask ) postExecute ( context . Context ) error {
2021-06-15 12:41:40 +08:00
segmentIDs := rst . SegmentIDs
log . Debug ( "releaseSegmentTask postExecute done" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "segmentIDs" , segmentIDs ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , rst . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
type watchDmChannelTask struct {
* baseTask
2021-06-15 12:41:40 +08:00
* querypb . WatchDmChannelsRequest
2021-10-11 09:54:37 +08:00
meta Meta
cluster Cluster
excludeNodeIDs [ ] int64
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return wdt . Base
}
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( wdt . WatchDmChannelsRequest )
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) isValid ( ) bool {
2021-10-11 09:54:37 +08:00
online , err := wdt . cluster . isOnline ( wdt . NodeID )
2021-06-30 17:48:19 +08:00
if err != nil {
return false
}
2021-10-11 09:54:37 +08:00
return wdt . ctx != nil && online
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) msgType ( ) commonpb . MsgType {
2021-06-15 12:41:40 +08:00
return wdt . Base . MsgType
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) timestamp ( ) Timestamp {
2021-06-15 12:41:40 +08:00
return wdt . Base . Timestamp
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) updateTaskProcess ( ) {
2021-10-14 20:18:33 +08:00
parentTask := wdt . getParentTask ( )
2021-10-11 09:54:37 +08:00
if parentTask == nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "watchDmChannelTask: parentTask should not be nil" )
2021-10-11 09:54:37 +08:00
return
}
2021-10-14 20:18:33 +08:00
parentTask . updateTaskProcess ( )
2021-10-11 09:54:37 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) preExecute ( context . Context ) error {
2021-06-15 12:41:40 +08:00
channelInfos := wdt . Infos
channels := make ( [ ] string , 0 )
for _ , info := range channelInfos {
channels = append ( channels , info . ChannelName )
2021-04-15 15:15:46 +08:00
}
2021-10-14 20:18:33 +08:00
wdt . setResultInfo ( nil )
2021-06-15 12:41:40 +08:00
log . Debug ( "start do watchDmChannelTask" ,
2021-06-19 11:45:09 +08:00
zap . Strings ( "dmChannels" , channels ) ,
zap . Int64 ( "loaded nodeID" , wdt . NodeID ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , wdt . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
wdt . retryCount --
} ( )
2021-08-02 22:39:25 +08:00
err := wdt . cluster . watchDmChannels ( wdt . ctx , wdt . NodeID , wdt . WatchDmChannelsRequest )
2021-04-15 15:15:46 +08:00
if err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "watchDmChannelTask: watchDmChannel occur error" , zap . Int64 ( "taskID" , wdt . getTaskID ( ) ) )
2021-10-14 20:18:33 +08:00
wdt . setResultInfo ( err )
2021-04-15 15:15:46 +08:00
return err
}
2021-06-15 12:41:40 +08:00
2021-06-19 11:45:09 +08:00
log . Debug ( "watchDmChannelsTask Execute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , wdt . getTaskID ( ) ) )
2021-06-15 12:41:40 +08:00
return nil
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) postExecute ( context . Context ) error {
2021-06-19 11:45:09 +08:00
log . Debug ( "watchDmChannelTask postExecute done" ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , wdt . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wdt * watchDmChannelTask ) reschedule ( ctx context . Context ) ( [ ] task , error ) {
2021-06-19 11:45:09 +08:00
collectionID := wdt . CollectionID
channelIDs := make ( [ ] string , 0 )
reScheduledTask := make ( [ ] task , 0 )
for _ , info := range wdt . Infos {
2021-07-13 14:16:00 +08:00
channelIDs = append ( channelIDs , info . ChannelName )
2021-06-19 11:45:09 +08:00
}
2021-10-11 09:54:37 +08:00
wdt . excludeNodeIDs = append ( wdt . excludeNodeIDs , wdt . NodeID )
channel2Nodes , err := shuffleChannelsToQueryNode ( channelIDs , wdt . cluster , false , wdt . excludeNodeIDs )
if err != nil {
log . Error ( "watchDmChannel reschedule failed" , zap . Int64s ( "excludeNodes" , wdt . excludeNodeIDs ) , zap . Error ( err ) )
return nil , err
}
2021-06-19 11:45:09 +08:00
node2channelInfos := make ( map [ int64 ] [ ] * datapb . VchannelInfo )
for index , info := range wdt . Infos {
nodeID := channel2Nodes [ index ]
if _ , ok := node2channelInfos [ nodeID ] ; ! ok {
node2channelInfos [ nodeID ] = make ( [ ] * datapb . VchannelInfo , 0 )
}
node2channelInfos [ nodeID ] = append ( node2channelInfos [ nodeID ] , info )
}
for nodeID , infos := range node2channelInfos {
2021-10-14 20:18:33 +08:00
watchDmChannelBaseTask := newBaseTask ( ctx , wdt . getTriggerCondition ( ) )
watchDmChannelBaseTask . setParentTask ( wdt . getParentTask ( ) )
2021-10-18 21:34:47 +08:00
watchDmChannelTask := & watchDmChannelTask {
baseTask : watchDmChannelBaseTask ,
2021-06-19 11:45:09 +08:00
WatchDmChannelsRequest : & querypb . WatchDmChannelsRequest {
Base : wdt . Base ,
NodeID : nodeID ,
CollectionID : wdt . CollectionID ,
PartitionID : wdt . PartitionID ,
Infos : infos ,
Schema : wdt . Schema ,
ExcludeInfos : wdt . ExcludeInfos ,
} ,
2021-10-11 09:54:37 +08:00
meta : wdt . meta ,
cluster : wdt . cluster ,
excludeNodeIDs : wdt . excludeNodeIDs ,
2021-06-19 11:45:09 +08:00
}
2021-10-11 09:54:37 +08:00
reScheduledTask = append ( reScheduledTask , watchDmChannelTask )
2021-10-18 21:34:47 +08:00
log . Debug ( "watchDmChannelTask: add a watchDmChannelTask to RescheduleTasks" , zap . Any ( "task" , watchDmChannelTask ) )
2021-06-19 11:45:09 +08:00
hasWatchQueryChannel := wdt . cluster . hasWatchedQueryChannel ( wdt . ctx , nodeID , collectionID )
if ! hasWatchQueryChannel {
2021-10-22 19:07:15 +08:00
queryChannelInfo , err := wdt . meta . getQueryChannelInfoByID ( collectionID )
2021-09-29 09:56:04 +08:00
if err != nil {
return nil , err
}
2021-06-19 11:45:09 +08:00
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( wdt . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchQueryChannels
2021-06-19 11:45:09 +08:00
addQueryChannelRequest := & querypb . AddQueryChannelRequest {
2021-10-22 19:07:15 +08:00
Base : msgBase ,
NodeID : nodeID ,
CollectionID : collectionID ,
RequestChannelID : queryChannelInfo . QueryChannelID ,
ResultChannelID : queryChannelInfo . QueryResultChannelID ,
GlobalSealedSegments : queryChannelInfo . GlobalSealedSegments ,
SeekPosition : queryChannelInfo . SeekPosition ,
2021-06-19 11:45:09 +08:00
}
2021-10-14 20:18:33 +08:00
watchQueryChannelBaseTask := newBaseTask ( ctx , wdt . getTriggerCondition ( ) )
watchQueryChannelBaseTask . setParentTask ( wdt . getParentTask ( ) )
2021-10-18 21:34:47 +08:00
watchQueryChannelTask := & watchQueryChannelTask {
baseTask : watchQueryChannelBaseTask ,
2021-06-19 11:45:09 +08:00
AddQueryChannelRequest : addQueryChannelRequest ,
cluster : wdt . cluster ,
}
reScheduledTask = append ( reScheduledTask , watchQueryChannelTask )
2021-10-18 21:34:47 +08:00
log . Debug ( "watchDmChannelTask: add a watchQueryChannelTask to RescheduleTasks" , zap . Any ( "task" , watchQueryChannelTask ) )
2021-06-19 11:45:09 +08:00
}
}
return reScheduledTask , nil
}
2021-10-18 21:34:47 +08:00
type watchQueryChannelTask struct {
* baseTask
2021-06-15 12:41:40 +08:00
* querypb . AddQueryChannelRequest
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-06-15 12:41:40 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return wqt . Base
}
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( wqt . AddQueryChannelRequest )
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) isValid ( ) bool {
2021-10-11 09:54:37 +08:00
online , err := wqt . cluster . isOnline ( wqt . NodeID )
2021-06-30 17:48:19 +08:00
if err != nil {
return false
}
2021-10-11 09:54:37 +08:00
return wqt . ctx != nil && online
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) msgType ( ) commonpb . MsgType {
2021-06-19 11:45:09 +08:00
return wqt . Base . MsgType
}
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) timestamp ( ) Timestamp {
2021-06-19 11:45:09 +08:00
return wqt . Base . Timestamp
}
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) updateTaskProcess ( ) {
2021-10-14 20:18:33 +08:00
parentTask := wqt . getParentTask ( )
2021-10-11 09:54:37 +08:00
if parentTask == nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "watchQueryChannelTask: parentTask should not be nil" )
2021-10-11 09:54:37 +08:00
return
2021-08-02 22:39:25 +08:00
}
2021-10-14 20:18:33 +08:00
parentTask . updateTaskProcess ( )
2021-10-11 09:54:37 +08:00
}
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) preExecute ( context . Context ) error {
2021-10-14 20:18:33 +08:00
wqt . setResultInfo ( nil )
2021-10-18 21:34:47 +08:00
log . Debug ( "start do watchQueryChannelTask" ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "collectionID" , wqt . CollectionID ) ,
zap . String ( "queryChannel" , wqt . RequestChannelID ) ,
zap . String ( "queryResultChannel" , wqt . ResultChannelID ) ,
zap . Int64 ( "loaded nodeID" , wqt . NodeID ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , wqt . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
wqt . retryCount --
} ( )
2021-08-02 22:39:25 +08:00
err := wqt . cluster . addQueryChannel ( wqt . ctx , wqt . NodeID , wqt . AddQueryChannelRequest )
2021-06-15 12:41:40 +08:00
if err != nil {
2021-10-24 15:16:00 +08:00
log . Warn ( "watchQueryChannelTask: watchQueryChannel occur error" , zap . Int64 ( "taskID" , wqt . getTaskID ( ) ) , zap . Error ( err ) )
2021-10-14 20:18:33 +08:00
wqt . setResultInfo ( err )
2021-04-15 15:15:46 +08:00
return err
}
2021-06-15 12:41:40 +08:00
log . Debug ( "watchQueryChannelTask Execute done" ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "collectionID" , wqt . CollectionID ) ,
zap . String ( "queryChannel" , wqt . RequestChannelID ) ,
zap . String ( "queryResultChannel" , wqt . ResultChannelID ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , wqt . getTaskID ( ) ) )
2021-06-15 12:41:40 +08:00
return nil
}
2021-06-16 11:09:56 +08:00
2021-10-18 21:34:47 +08:00
func ( wqt * watchQueryChannelTask ) postExecute ( context . Context ) error {
log . Debug ( "watchQueryChannelTask postExecute done" ,
2021-06-19 11:45:09 +08:00
zap . Int64 ( "collectionID" , wqt . CollectionID ) ,
zap . String ( "queryChannel" , wqt . RequestChannelID ) ,
zap . String ( "queryResultChannel" , wqt . ResultChannelID ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , wqt . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-15 12:41:40 +08:00
}
2021-10-24 22:39:09 +08:00
//****************************handoff task********************************//
2021-10-18 21:34:47 +08:00
type handoffTask struct {
2021-10-24 22:39:09 +08:00
* baseTask
* querypb . HandoffSegmentsRequest
dataCoord types . DataCoord
cluster Cluster
meta Meta
}
func ( ht * handoffTask ) msgBase ( ) * commonpb . MsgBase {
return ht . Base
}
func ( ht * handoffTask ) marshal ( ) ( [ ] byte , error ) {
return proto . Marshal ( ht . HandoffSegmentsRequest )
}
func ( ht * handoffTask ) msgType ( ) commonpb . MsgType {
return ht . Base . MsgType
}
func ( ht * handoffTask ) timestamp ( ) Timestamp {
return ht . Base . Timestamp
}
func ( ht * handoffTask ) preExecute ( context . Context ) error {
ht . setResultInfo ( nil )
segmentIDs := make ( [ ] UniqueID , 0 )
segmentInfos := ht . HandoffSegmentsRequest . SegmentInfos
for _ , info := range segmentInfos {
segmentIDs = append ( segmentIDs , info . SegmentID )
}
log . Debug ( "start do handoff segments task" ,
zap . Int64s ( "segmentIDs" , segmentIDs ) )
return nil
}
func ( ht * handoffTask ) execute ( ctx context . Context ) error {
segmentInfos := ht . HandoffSegmentsRequest . SegmentInfos
for _ , segmentInfo := range segmentInfos {
collectionID := segmentInfo . CollectionID
partitionID := segmentInfo . PartitionID
segmentID := segmentInfo . SegmentID
collectionInfo , err := ht . meta . getCollectionInfoByID ( collectionID )
if err != nil {
log . Debug ( "handoffTask: collection has not been loaded into memory" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64 ( "segmentID" , segmentID ) )
continue
}
2021-10-26 13:04:18 +08:00
if collectionInfo . LoadType == querypb . LoadType_loadCollection && ht . meta . hasReleasePartition ( collectionID , partitionID ) {
log . Debug ( "handoffTask: partition has not been released" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64 ( "partitionID" , partitionID ) )
continue
}
2021-10-24 22:39:09 +08:00
partitionLoaded := false
for _ , id := range collectionInfo . PartitionIDs {
if id == partitionID {
partitionLoaded = true
}
}
if collectionInfo . LoadType != querypb . LoadType_loadCollection && ! partitionLoaded {
log . Debug ( "handoffTask: partition has not been loaded into memory" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64 ( "partitionID" , partitionID ) , zap . Int64 ( "segmentID" , segmentID ) )
continue
}
_ , err = ht . meta . getSegmentInfoByID ( segmentID )
if err != nil {
getRecoveryInfoRequest := & datapb . GetRecoveryInfoRequest {
Base : ht . Base ,
CollectionID : collectionID ,
PartitionID : partitionID ,
}
recoveryInfo , err := ht . dataCoord . GetRecoveryInfo ( ctx , getRecoveryInfoRequest )
if err != nil {
ht . setResultInfo ( err )
return err
}
findBinlog := false
var loadSegmentReq * querypb . LoadSegmentsRequest
for _ , segmentBinlogs := range recoveryInfo . Binlogs {
if segmentBinlogs . SegmentID == segmentID {
findBinlog = true
segmentLoadInfo := & querypb . SegmentLoadInfo {
SegmentID : segmentID ,
PartitionID : partitionID ,
CollectionID : collectionID ,
BinlogPaths : segmentBinlogs . FieldBinlogs ,
NumOfRows : segmentBinlogs . NumOfRows ,
}
msgBase := proto . Clone ( ht . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_LoadSegments
loadSegmentReq = & querypb . LoadSegmentsRequest {
Base : msgBase ,
Infos : [ ] * querypb . SegmentLoadInfo { segmentLoadInfo } ,
Schema : collectionInfo . Schema ,
LoadCondition : querypb . TriggerCondition_handoff ,
}
}
}
if ! findBinlog {
err = fmt . Errorf ( "segmnet has not been flushed, segmentID is %d" , segmentID )
ht . setResultInfo ( err )
return err
}
err = assignInternalTask ( ctx , collectionID , ht , ht . meta , ht . cluster , [ ] * querypb . LoadSegmentsRequest { loadSegmentReq } , nil , true )
if err != nil {
log . Error ( "handoffTask: assign child task failed" , zap . Any ( "segmentInfo" , segmentInfo ) )
ht . setResultInfo ( err )
return err
}
} else {
err = fmt . Errorf ( "sealed segment has been exist on query node, segmentID is %d" , segmentID )
log . Error ( "handoffTask: sealed segment has been exist on query node" , zap . Int64 ( "segmentID" , segmentID ) )
ht . setResultInfo ( err )
return err
}
}
log . Debug ( "handoffTask: assign child task done" , zap . Any ( "segmentInfos" , segmentInfos ) )
log . Debug ( "handoffTask Execute done" ,
zap . Int64 ( "taskID" , ht . getTaskID ( ) ) )
return nil
}
func ( ht * handoffTask ) postExecute ( context . Context ) error {
if ht . result . ErrorCode != commonpb . ErrorCode_Success {
ht . childTasks = [ ] task { }
}
log . Debug ( "handoffTask postExecute done" ,
zap . Int64 ( "taskID" , ht . getTaskID ( ) ) )
return nil
}
func ( ht * handoffTask ) rollBack ( ctx context . Context ) [ ] task {
resultTasks := make ( [ ] task , 0 )
childTasks := ht . getChildTask ( )
for _ , childTask := range childTasks {
if childTask . msgType ( ) == commonpb . MsgType_LoadSegments {
// TODO:: add release segment to rollBack, no release does not affect correctness of query
}
}
return resultTasks
2021-06-19 11:45:09 +08:00
}
2021-06-15 12:41:40 +08:00
2021-10-18 21:34:47 +08:00
type loadBalanceTask struct {
* baseTask
2021-06-19 11:45:09 +08:00
* querypb . LoadBalanceRequest
2021-06-21 18:22:13 +08:00
rootCoord types . RootCoord
dataCoord types . DataCoord
2021-09-15 20:40:07 +08:00
cluster Cluster
2021-08-02 22:39:25 +08:00
meta Meta
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) msgBase ( ) * commonpb . MsgBase {
2021-06-26 16:08:11 +08:00
return lbt . Base
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) marshal ( ) ( [ ] byte , error ) {
2021-08-03 22:03:25 +08:00
return proto . Marshal ( lbt . LoadBalanceRequest )
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) msgType ( ) commonpb . MsgType {
2021-06-19 11:45:09 +08:00
return lbt . Base . MsgType
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) timestamp ( ) Timestamp {
2021-06-19 11:45:09 +08:00
return lbt . Base . Timestamp
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) preExecute ( context . Context ) error {
2021-10-14 20:18:33 +08:00
lbt . setResultInfo ( nil )
2021-10-18 21:34:47 +08:00
log . Debug ( "start do loadBalanceTask" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "sourceNodeIDs" , lbt . SourceNodeIDs ) ,
zap . Any ( "balanceReason" , lbt . BalanceReason ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , lbt . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-06-19 11:45:09 +08:00
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) execute ( ctx context . Context ) error {
2021-10-11 09:54:37 +08:00
defer func ( ) {
lbt . retryCount --
} ( )
2021-06-19 11:45:09 +08:00
if lbt . triggerCondition == querypb . TriggerCondition_nodeDown {
for _ , nodeID := range lbt . SourceNodeIDs {
2021-08-02 22:39:25 +08:00
collectionInfos := lbt . cluster . getCollectionInfosByID ( lbt . ctx , nodeID )
for _ , info := range collectionInfos {
collectionID := info . CollectionID
2021-06-30 17:48:19 +08:00
metaInfo , err := lbt . meta . getCollectionInfoByID ( collectionID )
if err != nil {
2021-10-18 21:34:47 +08:00
log . Warn ( "loadBalanceTask: getCollectionInfoByID occur error" , zap . String ( "error" , err . Error ( ) ) )
2021-10-14 20:18:33 +08:00
lbt . setResultInfo ( err )
2021-10-11 09:54:37 +08:00
return err
2021-06-30 17:48:19 +08:00
}
2021-08-02 22:39:25 +08:00
loadType := metaInfo . LoadType
2021-06-30 17:48:19 +08:00
schema := metaInfo . Schema
2021-06-19 11:45:09 +08:00
partitionIDs := info . PartitionIDs
segmentsToLoad := make ( [ ] UniqueID , 0 )
2021-06-30 17:48:19 +08:00
loadSegmentReqs := make ( [ ] * querypb . LoadSegmentsRequest , 0 )
2021-06-19 11:45:09 +08:00
channelsToWatch := make ( [ ] string , 0 )
2021-06-30 17:48:19 +08:00
watchDmChannelReqs := make ( [ ] * querypb . WatchDmChannelsRequest , 0 )
2021-06-19 11:45:09 +08:00
dmChannels , err := lbt . meta . getDmChannelsByNodeID ( collectionID , nodeID )
if err != nil {
2021-10-14 20:18:33 +08:00
lbt . setResultInfo ( err )
2021-06-19 11:45:09 +08:00
return err
2021-06-15 12:41:40 +08:00
}
2021-06-19 11:45:09 +08:00
for _ , partitionID := range partitionIDs {
getRecoveryInfo := & datapb . GetRecoveryInfoRequest {
Base : & commonpb . MsgBase {
MsgType : commonpb . MsgType_LoadBalanceSegments ,
} ,
CollectionID : collectionID ,
PartitionID : partitionID ,
}
2021-07-01 15:24:17 +08:00
recoveryInfo , err := lbt . dataCoord . GetRecoveryInfo ( ctx , getRecoveryInfo )
2021-06-19 11:45:09 +08:00
if err != nil {
2021-10-14 20:18:33 +08:00
lbt . setResultInfo ( err )
2021-06-19 11:45:09 +08:00
return err
}
2021-06-30 17:48:19 +08:00
for _ , segmentBingLog := range recoveryInfo . Binlogs {
segmentID := segmentBingLog . SegmentID
segmentLoadInfo := & querypb . SegmentLoadInfo {
SegmentID : segmentID ,
PartitionID : partitionID ,
CollectionID : collectionID ,
BinlogPaths : segmentBingLog . FieldBinlogs ,
2021-09-07 11:35:18 +08:00
NumOfRows : segmentBingLog . NumOfRows ,
2021-10-22 14:31:13 +08:00
Statslogs : segmentBingLog . Statslogs ,
Deltalogs : segmentBingLog . Deltalogs ,
2021-06-30 17:48:19 +08:00
}
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lbt . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_LoadSegments
2021-06-30 17:48:19 +08:00
loadSegmentReq := & querypb . LoadSegmentsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-30 17:48:19 +08:00
Infos : [ ] * querypb . SegmentLoadInfo { segmentLoadInfo } ,
Schema : schema ,
LoadCondition : querypb . TriggerCondition_nodeDown ,
2021-10-22 19:07:15 +08:00
SourceNodeID : nodeID ,
2021-06-30 17:48:19 +08:00
}
segmentsToLoad = append ( segmentsToLoad , segmentID )
loadSegmentReqs = append ( loadSegmentReqs , loadSegmentReq )
}
2021-06-19 11:45:09 +08:00
for _ , channelInfo := range recoveryInfo . Channels {
for _ , channel := range dmChannels {
if channelInfo . ChannelName == channel {
2021-08-02 22:39:25 +08:00
if loadType == querypb . LoadType_loadCollection {
2021-06-30 17:48:19 +08:00
merged := false
for index , channelName := range channelsToWatch {
if channel == channelName {
merged = true
oldInfo := watchDmChannelReqs [ index ] . Infos [ 0 ]
newInfo := mergeVChannelInfo ( oldInfo , channelInfo )
watchDmChannelReqs [ index ] . Infos = [ ] * datapb . VchannelInfo { newInfo }
break
}
}
if ! merged {
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lbt . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchDmChannels
2021-06-30 17:48:19 +08:00
watchRequest := & querypb . WatchDmChannelsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-30 17:48:19 +08:00
CollectionID : collectionID ,
Infos : [ ] * datapb . VchannelInfo { channelInfo } ,
Schema : schema ,
}
2021-06-19 11:45:09 +08:00
channelsToWatch = append ( channelsToWatch , channel )
2021-06-30 17:48:19 +08:00
watchDmChannelReqs = append ( watchDmChannelReqs , watchRequest )
2021-06-19 11:45:09 +08:00
}
} else {
2021-07-13 14:16:00 +08:00
msgBase := proto . Clone ( lbt . Base ) . ( * commonpb . MsgBase )
msgBase . MsgType = commonpb . MsgType_WatchDmChannels
2021-06-30 17:48:19 +08:00
watchRequest := & querypb . WatchDmChannelsRequest {
2021-07-13 14:16:00 +08:00
Base : msgBase ,
2021-06-30 17:48:19 +08:00
CollectionID : collectionID ,
PartitionID : partitionID ,
Infos : [ ] * datapb . VchannelInfo { channelInfo } ,
Schema : schema ,
}
2021-06-19 11:45:09 +08:00
channelsToWatch = append ( channelsToWatch , channel )
2021-06-30 17:48:19 +08:00
watchDmChannelReqs = append ( watchDmChannelReqs , watchRequest )
2021-06-19 11:45:09 +08:00
}
break
}
}
}
2021-06-15 12:41:40 +08:00
}
2021-10-11 09:54:37 +08:00
err = assignInternalTask ( ctx , collectionID , lbt , lbt . meta , lbt . cluster , loadSegmentReqs , watchDmChannelReqs , true )
2021-09-29 09:56:04 +08:00
if err != nil {
2021-10-11 09:54:37 +08:00
log . Warn ( "loadBalanceTask: assign child task failed" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64s ( "partitionIDs" , partitionIDs ) )
2021-10-14 20:18:33 +08:00
lbt . setResultInfo ( err )
2021-09-29 09:56:04 +08:00
return err
}
2021-06-30 17:48:19 +08:00
log . Debug ( "loadBalanceTask: assign child task done" , zap . Int64 ( "collectionID" , collectionID ) , zap . Int64s ( "partitionIDs" , partitionIDs ) )
2021-06-15 12:41:40 +08:00
}
2021-04-15 15:15:46 +08:00
}
}
2021-06-19 11:45:09 +08:00
//TODO::
//if lbt.triggerCondition == querypb.TriggerCondition_loadBalance {
// return nil
//}
2021-10-18 21:34:47 +08:00
log . Debug ( "loadBalanceTask Execute done" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "sourceNodeIDs" , lbt . SourceNodeIDs ) ,
zap . Any ( "balanceReason" , lbt . BalanceReason ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , lbt . getTaskID ( ) ) )
2021-06-19 11:45:09 +08:00
return nil
}
2021-10-18 21:34:47 +08:00
func ( lbt * loadBalanceTask ) postExecute ( context . Context ) error {
2021-10-11 09:54:37 +08:00
if lbt . result . ErrorCode == commonpb . ErrorCode_Success {
for _ , id := range lbt . SourceNodeIDs {
err := lbt . cluster . removeNodeInfo ( id )
if err != nil {
2021-10-18 21:34:47 +08:00
log . Error ( "loadBalanceTask: occur error when removing node info from cluster" , zap . Int64 ( "nodeID" , id ) )
2021-10-11 09:54:37 +08:00
}
2021-06-22 14:10:09 +08:00
}
2021-10-11 09:54:37 +08:00
} else {
lbt . childTasks = [ ] task { }
2021-06-22 14:10:09 +08:00
}
2021-10-11 09:54:37 +08:00
2021-10-18 21:34:47 +08:00
log . Debug ( "loadBalanceTask postExecute done" ,
2021-06-19 11:45:09 +08:00
zap . Int64s ( "sourceNodeIDs" , lbt . SourceNodeIDs ) ,
zap . Any ( "balanceReason" , lbt . BalanceReason ) ,
2021-10-14 20:18:33 +08:00
zap . Int64 ( "taskID" , lbt . getTaskID ( ) ) )
2021-06-30 16:18:13 +08:00
return nil
2021-04-15 15:15:46 +08:00
}
2021-10-11 09:54:37 +08:00
func shuffleChannelsToQueryNode ( dmChannels [ ] string , cluster Cluster , wait bool , excludeNodeIDs [ ] int64 ) ( [ ] int64 , error ) {
2021-04-15 15:15:46 +08:00
maxNumChannels := 0
2021-08-02 22:39:25 +08:00
nodes := make ( map [ int64 ] Node )
2021-06-30 17:48:19 +08:00
var err error
for {
2021-09-15 20:40:07 +08:00
nodes , err = cluster . onlineNodes ( )
2021-06-30 17:48:19 +08:00
if err != nil {
log . Debug ( err . Error ( ) )
2021-10-11 09:54:37 +08:00
if ! wait {
return nil , err
}
2021-06-30 17:48:19 +08:00
time . Sleep ( 1 * time . Second )
2021-06-19 11:45:09 +08:00
continue
}
2021-10-11 09:54:37 +08:00
for _ , id := range excludeNodeIDs {
delete ( nodes , id )
}
if len ( nodes ) > 0 {
break
}
if ! wait {
return nil , errors . New ( "no queryNode to allocate" )
}
2021-06-30 17:48:19 +08:00
}
for nodeID := range nodes {
2021-06-15 12:41:40 +08:00
numChannels , _ := cluster . getNumDmChannels ( nodeID )
2021-04-15 15:15:46 +08:00
if numChannels > maxNumChannels {
maxNumChannels = numChannels
}
}
2021-06-19 11:45:09 +08:00
res := make ( [ ] int64 , 0 )
2021-06-15 12:41:40 +08:00
if len ( dmChannels ) == 0 {
2021-10-11 09:54:37 +08:00
return res , nil
2021-06-15 12:41:40 +08:00
}
2021-04-15 15:15:46 +08:00
offset := 0
loopAll := false
for {
lastOffset := offset
if ! loopAll {
2021-06-30 17:48:19 +08:00
for nodeID := range nodes {
numSegments , _ := cluster . getNumSegments ( nodeID )
2021-06-15 12:41:40 +08:00
if numSegments >= maxNumChannels {
2021-04-15 15:15:46 +08:00
continue
}
2021-06-30 17:48:19 +08:00
res = append ( res , nodeID )
2021-04-15 15:15:46 +08:00
offset ++
if offset == len ( dmChannels ) {
2021-10-11 09:54:37 +08:00
return res , nil
2021-04-15 15:15:46 +08:00
}
}
} else {
2021-06-30 17:48:19 +08:00
for nodeID := range nodes {
res = append ( res , nodeID )
2021-04-15 15:15:46 +08:00
offset ++
if offset == len ( dmChannels ) {
2021-10-11 09:54:37 +08:00
return res , nil
2021-04-15 15:15:46 +08:00
}
}
}
if lastOffset == offset {
loopAll = true
}
}
}
2021-10-21 11:08:37 +08:00
// shuffleSegmentsToQueryNode shuffle segments to online nodes
// returned are noded id for each segment, which satisfies:
// len(returnedNodeIds) == len(segmentIDs) && segmentIDs[i] is assigned to returnedNodeIds[i]
2021-10-11 09:54:37 +08:00
func shuffleSegmentsToQueryNode ( segmentIDs [ ] UniqueID , cluster Cluster , wait bool , excludeNodeIDs [ ] int64 ) ( [ ] int64 , error ) {
2021-04-15 15:15:46 +08:00
maxNumSegments := 0
2021-08-02 22:39:25 +08:00
nodes := make ( map [ int64 ] Node )
2021-06-30 17:48:19 +08:00
var err error
for {
2021-09-15 20:40:07 +08:00
nodes , err = cluster . onlineNodes ( )
2021-06-30 17:48:19 +08:00
if err != nil {
log . Debug ( err . Error ( ) )
2021-10-11 09:54:37 +08:00
if ! wait {
return nil , err
}
2021-06-30 17:48:19 +08:00
time . Sleep ( 1 * time . Second )
2021-06-19 11:45:09 +08:00
continue
}
2021-10-11 09:54:37 +08:00
for _ , id := range excludeNodeIDs {
delete ( nodes , id )
}
if len ( nodes ) > 0 {
break
}
if ! wait {
return nil , errors . New ( "no queryNode to allocate" )
}
2021-06-30 17:48:19 +08:00
}
for nodeID := range nodes {
2021-06-15 12:41:40 +08:00
numSegments , _ := cluster . getNumSegments ( nodeID )
2021-04-15 15:15:46 +08:00
if numSegments > maxNumSegments {
maxNumSegments = numSegments
}
}
2021-06-26 16:08:11 +08:00
res := make ( [ ] int64 , 0 )
2021-04-15 15:15:46 +08:00
if len ( segmentIDs ) == 0 {
2021-10-11 09:54:37 +08:00
return res , nil
2021-04-15 15:15:46 +08:00
}
offset := 0
loopAll := false
for {
lastOffset := offset
if ! loopAll {
2021-06-30 17:48:19 +08:00
for nodeID := range nodes {
numSegments , _ := cluster . getNumSegments ( nodeID )
2021-06-15 12:41:40 +08:00
if numSegments >= maxNumSegments {
2021-04-15 15:15:46 +08:00
continue
}
2021-06-30 17:48:19 +08:00
res = append ( res , nodeID )
2021-04-15 15:15:46 +08:00
offset ++
if offset == len ( segmentIDs ) {
2021-10-11 09:54:37 +08:00
return res , nil
2021-04-15 15:15:46 +08:00
}
}
} else {
2021-06-30 17:48:19 +08:00
for nodeID := range nodes {
res = append ( res , nodeID )
2021-04-15 15:15:46 +08:00
offset ++
if offset == len ( segmentIDs ) {
2021-10-11 09:54:37 +08:00
return res , nil
2021-04-15 15:15:46 +08:00
}
}
}
if lastOffset == offset {
loopAll = true
}
}
}
2021-06-15 12:41:40 +08:00
2021-06-16 11:09:56 +08:00
func mergeVChannelInfo ( info1 * datapb . VchannelInfo , info2 * datapb . VchannelInfo ) * datapb . VchannelInfo {
2021-06-15 12:41:40 +08:00
collectionID := info1 . CollectionID
channelName := info1 . ChannelName
2021-06-16 11:09:56 +08:00
var seekPosition * internalpb . MsgPosition
if info1 . SeekPosition == nil || info2 . SeekPosition == nil {
seekPosition = & internalpb . MsgPosition {
ChannelName : channelName ,
}
} else {
seekPosition = info1 . SeekPosition
if info1 . SeekPosition . Timestamp > info2 . SeekPosition . Timestamp {
seekPosition = info2 . SeekPosition
}
2021-06-15 12:41:40 +08:00
}
2021-06-16 11:09:56 +08:00
checkPoints := make ( [ ] * datapb . SegmentInfo , 0 )
checkPoints = append ( checkPoints , info1 . UnflushedSegments ... )
checkPoints = append ( checkPoints , info2 . UnflushedSegments ... )
2021-06-15 12:41:40 +08:00
2021-10-08 19:09:12 +08:00
flushedSegments := make ( [ ] * datapb . SegmentInfo , 0 )
2021-06-15 12:41:40 +08:00
flushedSegments = append ( flushedSegments , info1 . FlushedSegments ... )
flushedSegments = append ( flushedSegments , info2 . FlushedSegments ... )
2021-06-16 11:09:56 +08:00
return & datapb . VchannelInfo {
CollectionID : collectionID ,
ChannelName : channelName ,
SeekPosition : seekPosition ,
UnflushedSegments : checkPoints ,
FlushedSegments : flushedSegments ,
2021-06-15 12:41:40 +08:00
}
}
2021-10-11 09:54:37 +08:00
2021-06-30 16:18:13 +08:00
func assignInternalTask ( ctx context . Context ,
collectionID UniqueID ,
2021-06-26 16:08:11 +08:00
parentTask task ,
2021-08-02 22:39:25 +08:00
meta Meta ,
2021-09-15 20:40:07 +08:00
cluster Cluster ,
2021-06-26 16:08:11 +08:00
loadSegmentRequests [ ] * querypb . LoadSegmentsRequest ,
2021-10-11 09:54:37 +08:00
watchDmChannelRequests [ ] * querypb . WatchDmChannelsRequest ,
wait bool ) error {
2021-06-30 16:18:13 +08:00
sp , _ := trace . StartSpanFromContext ( ctx )
defer sp . Finish ( )
2021-06-26 16:08:11 +08:00
segmentsToLoad := make ( [ ] UniqueID , 0 )
for _ , req := range loadSegmentRequests {
segmentsToLoad = append ( segmentsToLoad , req . Infos [ 0 ] . SegmentID )
}
channelsToWatch := make ( [ ] string , 0 )
for _ , req := range watchDmChannelRequests {
channelsToWatch = append ( channelsToWatch , req . Infos [ 0 ] . ChannelName )
}
2021-10-11 09:54:37 +08:00
segment2Nodes , err := shuffleSegmentsToQueryNode ( segmentsToLoad , cluster , wait , nil )
if err != nil {
log . Error ( "assignInternalTask: segment to node failed" , zap . Any ( "segments map" , segment2Nodes ) , zap . Int64 ( "collectionID" , collectionID ) )
return err
}
2021-07-13 14:16:00 +08:00
log . Debug ( "assignInternalTask: segment to node" , zap . Any ( "segments map" , segment2Nodes ) , zap . Int64 ( "collectionID" , collectionID ) )
2021-10-11 09:54:37 +08:00
watchRequest2Nodes , err := shuffleChannelsToQueryNode ( channelsToWatch , cluster , wait , nil )
if err != nil {
log . Error ( "assignInternalTask: watch request to node failed" , zap . Any ( "request map" , watchRequest2Nodes ) , zap . Int64 ( "collectionID" , collectionID ) )
return err
}
2021-06-26 16:08:11 +08:00
log . Debug ( "assignInternalTask: watch request to node" , zap . Any ( "request map" , watchRequest2Nodes ) , zap . Int64 ( "collectionID" , collectionID ) )
watchQueryChannelInfo := make ( map [ int64 ] bool )
2021-10-19 10:40:35 +08:00
node2Segments := make ( map [ int64 ] [ ] * querypb . LoadSegmentsRequest )
sizeCounts := make ( map [ int64 ] int )
2021-06-26 16:08:11 +08:00
for index , nodeID := range segment2Nodes {
2021-10-20 22:32:36 +08:00
sizeOfReq := getSizeOfLoadSegmentReq ( loadSegmentRequests [ index ] )
2021-06-26 16:08:11 +08:00
if _ , ok := node2Segments [ nodeID ] ; ! ok {
2021-10-19 10:40:35 +08:00
node2Segments [ nodeID ] = make ( [ ] * querypb . LoadSegmentsRequest , 0 )
node2Segments [ nodeID ] = append ( node2Segments [ nodeID ] , loadSegmentRequests [ index ] )
2021-10-20 22:32:36 +08:00
sizeCounts [ nodeID ] = sizeOfReq
} else {
2021-10-28 17:26:28 +08:00
if sizeCounts [ nodeID ] + sizeOfReq > MaxSendSizeToEtcd {
2021-10-20 22:32:36 +08:00
node2Segments [ nodeID ] = append ( node2Segments [ nodeID ] , loadSegmentRequests [ index ] )
sizeCounts [ nodeID ] = sizeOfReq
} else {
lastReq := node2Segments [ nodeID ] [ len ( node2Segments [ nodeID ] ) - 1 ]
lastReq . Infos = append ( lastReq . Infos , loadSegmentRequests [ index ] . Infos ... )
sizeCounts [ nodeID ] += sizeOfReq
}
2021-06-26 16:08:11 +08:00
}
2021-10-19 10:40:35 +08:00
2021-10-14 20:18:33 +08:00
if cluster . hasWatchedQueryChannel ( parentTask . traceCtx ( ) , nodeID , collectionID ) {
2021-06-26 16:08:11 +08:00
watchQueryChannelInfo [ nodeID ] = true
continue
}
watchQueryChannelInfo [ nodeID ] = false
}
for _ , nodeID := range watchRequest2Nodes {
2021-10-14 20:18:33 +08:00
if cluster . hasWatchedQueryChannel ( parentTask . traceCtx ( ) , nodeID , collectionID ) {
2021-06-26 16:08:11 +08:00
watchQueryChannelInfo [ nodeID ] = true
continue
}
watchQueryChannelInfo [ nodeID ] = false
}
2021-10-19 10:40:35 +08:00
for nodeID , loadSegmentsReqs := range node2Segments {
for _ , req := range loadSegmentsReqs {
ctx = opentracing . ContextWithSpan ( context . Background ( ) , sp )
2021-10-22 19:07:15 +08:00
req . DstNodeID = nodeID
2021-10-19 10:40:35 +08:00
baseTask := newBaseTask ( ctx , parentTask . getTriggerCondition ( ) )
baseTask . setParentTask ( parentTask )
loadSegmentTask := & loadSegmentTask {
baseTask : baseTask ,
LoadSegmentsRequest : req ,
meta : meta ,
cluster : cluster ,
excludeNodeIDs : [ ] int64 { } ,
}
parentTask . addChildTask ( loadSegmentTask )
log . Debug ( "assignInternalTask: add a loadSegmentTask childTask" , zap . Any ( "task" , loadSegmentTask ) )
2021-06-26 16:08:11 +08:00
}
}
for index , nodeID := range watchRequest2Nodes {
2021-06-30 16:18:13 +08:00
ctx = opentracing . ContextWithSpan ( context . Background ( ) , sp )
2021-06-26 16:08:11 +08:00
watchDmChannelReq := watchDmChannelRequests [ index ]
watchDmChannelReq . NodeID = nodeID
2021-10-14 20:18:33 +08:00
baseTask := newBaseTask ( ctx , parentTask . getTriggerCondition ( ) )
baseTask . setParentTask ( parentTask )
2021-10-18 21:34:47 +08:00
watchDmChannelTask := & watchDmChannelTask {
baseTask : baseTask ,
2021-06-26 16:08:11 +08:00
WatchDmChannelsRequest : watchDmChannelReq ,
meta : meta ,
cluster : cluster ,
2021-10-11 09:54:37 +08:00
excludeNodeIDs : [ ] int64 { } ,
2021-06-26 16:08:11 +08:00
}
2021-10-14 20:18:33 +08:00
parentTask . addChildTask ( watchDmChannelTask )
2021-06-26 16:08:11 +08:00
log . Debug ( "assignInternalTask: add a watchDmChannelTask childTask" , zap . Any ( "task" , watchDmChannelTask ) )
}
for nodeID , watched := range watchQueryChannelInfo {
if ! watched {
2021-06-30 16:18:13 +08:00
ctx = opentracing . ContextWithSpan ( context . Background ( ) , sp )
2021-10-22 19:07:15 +08:00
queryChannelInfo , err := meta . getQueryChannelInfoByID ( collectionID )
2021-09-29 09:56:04 +08:00
if err != nil {
return err
}
2021-06-26 16:08:11 +08:00
2021-10-14 20:18:33 +08:00
msgBase := proto . Clone ( parentTask . msgBase ( ) ) . ( * commonpb . MsgBase )
2021-07-13 14:16:00 +08:00
msgBase . MsgType = commonpb . MsgType_WatchQueryChannels
2021-06-26 16:08:11 +08:00
addQueryChannelRequest := & querypb . AddQueryChannelRequest {
2021-10-22 19:07:15 +08:00
Base : msgBase ,
NodeID : nodeID ,
CollectionID : collectionID ,
RequestChannelID : queryChannelInfo . QueryChannelID ,
ResultChannelID : queryChannelInfo . QueryResultChannelID ,
GlobalSealedSegments : queryChannelInfo . GlobalSealedSegments ,
SeekPosition : queryChannelInfo . SeekPosition ,
2021-06-26 16:08:11 +08:00
}
2021-10-14 20:18:33 +08:00
baseTask := newBaseTask ( ctx , parentTask . getTriggerCondition ( ) )
baseTask . setParentTask ( parentTask )
2021-10-18 21:34:47 +08:00
watchQueryChannelTask := & watchQueryChannelTask {
baseTask : baseTask ,
2021-06-26 16:08:11 +08:00
AddQueryChannelRequest : addQueryChannelRequest ,
cluster : cluster ,
}
2021-10-14 20:18:33 +08:00
parentTask . addChildTask ( watchQueryChannelTask )
2021-06-26 16:08:11 +08:00
log . Debug ( "assignInternalTask: add a watchQueryChannelTask childTask" , zap . Any ( "task" , watchQueryChannelTask ) )
}
}
2021-09-29 09:56:04 +08:00
return nil
2021-06-26 16:08:11 +08:00
}
2021-10-19 10:40:35 +08:00
func getSizeOfLoadSegmentReq ( req * querypb . LoadSegmentsRequest ) int {
2021-10-20 19:27:27 +08:00
return proto . Size ( req )
2021-10-19 10:40:35 +08:00
}