2022-10-31 13:55:33 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package querynode
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
|
2023-02-26 11:31:49 +08:00
|
|
|
"github.com/cockroachdb/errors"
|
2022-10-31 13:55:33 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
2023-03-04 23:21:50 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/msgpb"
|
2022-10-31 13:55:33 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
|
|
queryPb "github.com/milvus-io/milvus/internal/proto/querypb"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/commonpbutil"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/funcutil"
|
2022-11-04 14:25:38 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/paramtable"
|
2022-10-31 13:55:33 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type watchDmChannelsTask struct {
|
|
|
|
baseTask
|
|
|
|
req *queryPb.WatchDmChannelsRequest
|
|
|
|
node *QueryNode
|
|
|
|
}
|
|
|
|
|
|
|
|
// watchDmChannelsTask
|
|
|
|
func (w *watchDmChannelsTask) Execute(ctx context.Context) (err error) {
|
|
|
|
collectionID := w.req.CollectionID
|
|
|
|
partitionIDs := w.req.GetPartitionIDs()
|
|
|
|
|
|
|
|
lType := w.req.GetLoadMeta().GetLoadType()
|
|
|
|
if lType == queryPb.LoadType_UnKnownType {
|
|
|
|
// if no partitionID is specified, load type is load collection
|
|
|
|
if len(partitionIDs) != 0 {
|
|
|
|
lType = queryPb.LoadType_LoadPartition
|
|
|
|
} else {
|
|
|
|
lType = queryPb.LoadType_LoadCollection
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// get all vChannels
|
|
|
|
var vChannels []Channel
|
|
|
|
VPChannels := make(map[string]string) // map[vChannel]pChannel
|
|
|
|
for _, info := range w.req.Infos {
|
|
|
|
v := info.ChannelName
|
|
|
|
p := funcutil.ToPhysicalChannel(info.ChannelName)
|
|
|
|
vChannels = append(vChannels, v)
|
|
|
|
VPChannels[v] = p
|
|
|
|
}
|
|
|
|
|
2022-11-15 19:03:08 +08:00
|
|
|
log := log.With(
|
|
|
|
zap.Int64("collectionID", w.req.GetCollectionID()),
|
|
|
|
zap.Strings("vChannels", vChannels),
|
|
|
|
zap.Int64("replicaID", w.req.GetReplicaID()),
|
|
|
|
)
|
|
|
|
|
2022-10-31 13:55:33 +08:00
|
|
|
if len(VPChannels) != len(vChannels) {
|
|
|
|
return errors.New("get physical channels failed, illegal channel length, collectionID = " + fmt.Sprintln(collectionID))
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("Starting WatchDmChannels ...",
|
2022-11-15 19:03:08 +08:00
|
|
|
zap.String("loadType", lType.String()),
|
|
|
|
zap.String("collectionName", w.req.GetSchema().GetName()),
|
2022-10-31 13:55:33 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
// init collection meta
|
|
|
|
coll := w.node.metaReplica.addCollection(collectionID, w.req.Schema)
|
|
|
|
|
|
|
|
// filter out the already exist channels
|
|
|
|
vChannels = coll.AddChannels(vChannels, VPChannels)
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
for _, vChannel := range vChannels {
|
|
|
|
coll.removeVChannel(vChannel)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
if len(vChannels) == 0 {
|
|
|
|
log.Warn("all channels has be added before, ignore watch dml requests")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
//add shard cluster
|
|
|
|
for _, vchannel := range vChannels {
|
2022-11-15 13:21:07 +08:00
|
|
|
w.node.ShardClusterService.addShardCluster(w.req.GetCollectionID(), w.req.GetReplicaID(), vchannel, w.req.GetVersion())
|
2022-10-31 13:55:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
for _, vchannel := range vChannels {
|
|
|
|
w.node.ShardClusterService.releaseShardCluster(vchannel)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
unFlushedSegmentIDs, err := w.LoadGrowingSegments(ctx, collectionID)
|
2023-02-28 08:17:49 +08:00
|
|
|
if err != nil {
|
|
|
|
return errors.Wrap(err, "failed to load growing segments")
|
|
|
|
}
|
2022-10-31 13:55:33 +08:00
|
|
|
|
|
|
|
// remove growing segment if watch dmChannels failed
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
|
|
|
for _, segmentID := range unFlushedSegmentIDs {
|
|
|
|
w.node.metaReplica.removeSegment(segmentID, segmentTypeGrowing)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2023-02-13 16:38:33 +08:00
|
|
|
channel2FlowGraph, err := w.initFlowGraph(collectionID, vChannels)
|
2022-10-31 13:55:33 +08:00
|
|
|
if err != nil {
|
2023-02-28 08:17:49 +08:00
|
|
|
return errors.Wrap(err, "failed to init flowgraph")
|
2022-10-31 13:55:33 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
coll.setLoadType(lType)
|
|
|
|
|
2022-11-15 19:03:08 +08:00
|
|
|
log.Info("watchDMChannel, init replica done")
|
2022-10-31 13:55:33 +08:00
|
|
|
|
|
|
|
// create tSafe
|
|
|
|
for _, channel := range vChannels {
|
|
|
|
w.node.tSafeReplica.addTSafe(channel)
|
|
|
|
}
|
|
|
|
|
|
|
|
// add tsafe watch in query shard if exists
|
|
|
|
for _, dmlChannel := range vChannels {
|
2023-02-28 08:17:49 +08:00
|
|
|
// Here this error could be ignored
|
2022-10-31 13:55:33 +08:00
|
|
|
w.node.queryShardService.addQueryShard(collectionID, dmlChannel, w.req.GetReplicaID())
|
|
|
|
}
|
|
|
|
|
|
|
|
// start flow graphs
|
|
|
|
for _, fg := range channel2FlowGraph {
|
|
|
|
fg.flowGraph.Start()
|
|
|
|
}
|
|
|
|
|
2022-11-15 19:03:08 +08:00
|
|
|
log.Info("WatchDmChannels done")
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// PostExecute setup ShardCluster first version and without do gc if failed.
|
|
|
|
func (w *watchDmChannelsTask) PostExecute(ctx context.Context) error {
|
|
|
|
// setup shard cluster version
|
|
|
|
var releasedChannels []string
|
|
|
|
for _, info := range w.req.GetInfos() {
|
|
|
|
sc, ok := w.node.ShardClusterService.getShardCluster(info.GetChannelName())
|
|
|
|
// shard cluster may be released by a release task
|
|
|
|
if !ok {
|
|
|
|
releasedChannels = append(releasedChannels, info.GetChannelName())
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
sc.SetupFirstVersion()
|
|
|
|
}
|
|
|
|
if len(releasedChannels) > 0 {
|
|
|
|
// no clean up needed, release shall do the job
|
|
|
|
log.Warn("WatchDmChannels failed, shard cluster may be released",
|
|
|
|
zap.Strings("releasedChannels", releasedChannels),
|
|
|
|
)
|
|
|
|
return fmt.Errorf("failed to watch %v, shard cluster may be released", releasedChannels)
|
|
|
|
}
|
2022-10-31 13:55:33 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (w *watchDmChannelsTask) LoadGrowingSegments(ctx context.Context, collectionID UniqueID) ([]UniqueID, error) {
|
|
|
|
// load growing segments
|
|
|
|
unFlushedSegments := make([]*queryPb.SegmentLoadInfo, 0)
|
|
|
|
unFlushedSegmentIDs := make([]UniqueID, 0)
|
|
|
|
for _, info := range w.req.Infos {
|
|
|
|
for _, ufInfoID := range info.GetUnflushedSegmentIds() {
|
|
|
|
// unFlushed segment may not have binLogs, skip loading
|
|
|
|
ufInfo := w.req.GetSegmentInfos()[ufInfoID]
|
|
|
|
if ufInfo == nil {
|
|
|
|
log.Warn("an unflushed segment is not found in segment infos", zap.Int64("segment ID", ufInfoID))
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if len(ufInfo.GetBinlogs()) > 0 {
|
|
|
|
unFlushedSegments = append(unFlushedSegments, &queryPb.SegmentLoadInfo{
|
|
|
|
SegmentID: ufInfo.ID,
|
|
|
|
PartitionID: ufInfo.PartitionID,
|
|
|
|
CollectionID: ufInfo.CollectionID,
|
|
|
|
BinlogPaths: ufInfo.Binlogs,
|
|
|
|
NumOfRows: ufInfo.NumOfRows,
|
|
|
|
Statslogs: ufInfo.Statslogs,
|
|
|
|
Deltalogs: ufInfo.Deltalogs,
|
|
|
|
InsertChannel: ufInfo.InsertChannel,
|
|
|
|
})
|
|
|
|
unFlushedSegmentIDs = append(unFlushedSegmentIDs, ufInfo.GetID())
|
|
|
|
} else {
|
|
|
|
log.Info("skip segment which binlog is empty", zap.Int64("segmentID", ufInfo.ID))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
req := &queryPb.LoadSegmentsRequest{
|
|
|
|
Base: commonpbutil.NewMsgBase(
|
|
|
|
commonpbutil.WithMsgType(commonpb.MsgType_LoadSegments),
|
|
|
|
commonpbutil.WithMsgID(w.req.Base.MsgID), // use parent task's msgID
|
|
|
|
),
|
|
|
|
Infos: unFlushedSegments,
|
|
|
|
CollectionID: collectionID,
|
|
|
|
Schema: w.req.GetSchema(),
|
|
|
|
LoadMeta: w.req.GetLoadMeta(),
|
|
|
|
}
|
|
|
|
|
|
|
|
// update partition info from unFlushedSegments and loadMeta
|
|
|
|
for _, info := range req.Infos {
|
|
|
|
err := w.node.metaReplica.addPartition(collectionID, info.PartitionID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for _, partitionID := range req.GetLoadMeta().GetPartitionIDs() {
|
|
|
|
err := w.node.metaReplica.addPartition(collectionID, partitionID)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Info("loading growing segments in WatchDmChannels...",
|
|
|
|
zap.Int64("collectionID", collectionID),
|
|
|
|
zap.Int64s("unFlushedSegmentIDs", unFlushedSegmentIDs),
|
|
|
|
)
|
2022-11-21 16:33:12 +08:00
|
|
|
_, err := w.node.loader.LoadSegment(w.ctx, req, segmentTypeGrowing)
|
2022-10-31 13:55:33 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("failed to load segment", zap.Int64("collection", collectionID), zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
log.Info("successfully load growing segments done in WatchDmChannels",
|
|
|
|
zap.Int64("collectionID", collectionID),
|
|
|
|
zap.Int64s("unFlushedSegmentIDs", unFlushedSegmentIDs),
|
|
|
|
)
|
|
|
|
return unFlushedSegmentIDs, nil
|
|
|
|
}
|
|
|
|
|
2023-02-13 16:38:33 +08:00
|
|
|
func (w *watchDmChannelsTask) initFlowGraph(collectionID UniqueID, vChannels []Channel) (map[string]*queryNodeFlowGraph, error) {
|
2022-10-31 13:55:33 +08:00
|
|
|
// So far, we don't support to enable each node with two different channel
|
2022-12-07 18:01:19 +08:00
|
|
|
consumeSubName := funcutil.GenChannelSubName(Params.CommonCfg.QueryNodeSubName.GetValue(), collectionID, paramtable.GetNodeID())
|
2022-10-31 13:55:33 +08:00
|
|
|
|
2023-02-13 16:38:33 +08:00
|
|
|
// group channels by to seeking
|
2023-03-04 23:21:50 +08:00
|
|
|
channel2SeekPosition := make(map[string]*msgpb.MsgPosition)
|
2022-10-31 13:55:33 +08:00
|
|
|
for _, info := range w.req.Infos {
|
2023-02-13 16:38:33 +08:00
|
|
|
if info.SeekPosition != nil && len(info.SeekPosition.MsgID) != 0 {
|
|
|
|
info.SeekPosition.MsgGroup = consumeSubName
|
2022-10-31 13:55:33 +08:00
|
|
|
}
|
|
|
|
channel2SeekPosition[info.ChannelName] = info.SeekPosition
|
|
|
|
}
|
|
|
|
log.Info("watchDMChannel, group channels done", zap.Int64("collectionID", collectionID))
|
|
|
|
|
|
|
|
// add excluded segments for unFlushed segments,
|
|
|
|
// unFlushed segments before check point should be filtered out.
|
|
|
|
unFlushedCheckPointInfos := make([]*datapb.SegmentInfo, 0)
|
|
|
|
for _, info := range w.req.Infos {
|
|
|
|
for _, ufsID := range info.GetUnflushedSegmentIds() {
|
|
|
|
unFlushedCheckPointInfos = append(unFlushedCheckPointInfos, w.req.SegmentInfos[ufsID])
|
|
|
|
}
|
|
|
|
}
|
|
|
|
w.node.metaReplica.addExcludedSegments(collectionID, unFlushedCheckPointInfos)
|
|
|
|
unflushedSegmentIDs := make([]UniqueID, len(unFlushedCheckPointInfos))
|
|
|
|
for i, segInfo := range unFlushedCheckPointInfos {
|
|
|
|
unflushedSegmentIDs[i] = segInfo.GetID()
|
|
|
|
}
|
|
|
|
log.Info("watchDMChannel, add check points info for unflushed segments done",
|
|
|
|
zap.Int64("collectionID", collectionID),
|
|
|
|
zap.Any("unflushedSegmentIDs", unflushedSegmentIDs),
|
|
|
|
)
|
|
|
|
|
|
|
|
// add excluded segments for flushed segments,
|
|
|
|
// flushed segments with later check point than seekPosition should be filtered out.
|
|
|
|
flushedCheckPointInfos := make([]*datapb.SegmentInfo, 0)
|
|
|
|
for _, info := range w.req.Infos {
|
|
|
|
for _, flushedSegmentID := range info.GetFlushedSegmentIds() {
|
|
|
|
flushedSegment := w.req.SegmentInfos[flushedSegmentID]
|
|
|
|
for _, position := range channel2SeekPosition {
|
|
|
|
if flushedSegment.DmlPosition != nil &&
|
|
|
|
flushedSegment.DmlPosition.ChannelName == position.ChannelName &&
|
|
|
|
flushedSegment.DmlPosition.Timestamp > position.Timestamp {
|
|
|
|
flushedCheckPointInfos = append(flushedCheckPointInfos, flushedSegment)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
w.node.metaReplica.addExcludedSegments(collectionID, flushedCheckPointInfos)
|
|
|
|
flushedSegmentIDs := make([]UniqueID, len(flushedCheckPointInfos))
|
|
|
|
for i, segInfo := range flushedCheckPointInfos {
|
|
|
|
flushedSegmentIDs[i] = segInfo.GetID()
|
|
|
|
}
|
|
|
|
log.Info("watchDMChannel, add check points info for flushed segments done",
|
|
|
|
zap.Int64("collectionID", collectionID),
|
|
|
|
zap.Any("flushedSegmentIDs", flushedSegmentIDs),
|
|
|
|
)
|
|
|
|
|
|
|
|
// add excluded segments for dropped segments,
|
|
|
|
// exclude all msgs with dropped segment id
|
|
|
|
// DO NOT refer to dropped segment info, see issue https://github.com/milvus-io/milvus/issues/19704
|
|
|
|
var droppedCheckPointInfos []*datapb.SegmentInfo
|
|
|
|
for _, info := range w.req.Infos {
|
|
|
|
for _, droppedSegmentID := range info.GetDroppedSegmentIds() {
|
|
|
|
droppedCheckPointInfos = append(droppedCheckPointInfos, &datapb.SegmentInfo{
|
|
|
|
ID: droppedSegmentID,
|
|
|
|
CollectionID: collectionID,
|
|
|
|
InsertChannel: info.GetChannelName(),
|
2023-03-04 23:21:50 +08:00
|
|
|
DmlPosition: &msgpb.MsgPosition{
|
2022-10-31 13:55:33 +08:00
|
|
|
ChannelName: info.GetChannelName(),
|
|
|
|
Timestamp: math.MaxUint64,
|
|
|
|
},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
w.node.metaReplica.addExcludedSegments(collectionID, droppedCheckPointInfos)
|
|
|
|
droppedSegmentIDs := make([]UniqueID, len(droppedCheckPointInfos))
|
|
|
|
for i, segInfo := range droppedCheckPointInfos {
|
|
|
|
droppedSegmentIDs[i] = segInfo.GetID()
|
|
|
|
}
|
|
|
|
log.Info("watchDMChannel, add check points info for dropped segments done",
|
|
|
|
zap.Int64("collectionID", collectionID),
|
|
|
|
zap.Any("droppedSegmentIDs", droppedSegmentIDs),
|
|
|
|
)
|
|
|
|
|
|
|
|
// add flow graph
|
2023-02-13 16:38:33 +08:00
|
|
|
channel2FlowGraph, err := w.node.dataSyncService.addFlowGraphsForDMLChannels(collectionID, channel2SeekPosition)
|
2022-10-31 13:55:33 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Warn("watchDMChannel, add flowGraph for dmChannels failed", zap.Int64("collectionID", collectionID), zap.Strings("vChannels", vChannels), zap.Error(err))
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
log.Info("watchDMChannel, add flowGraph for dmChannels success", zap.Int64("collectionID", collectionID), zap.Strings("vChannels", vChannels))
|
|
|
|
return channel2FlowGraph, nil
|
|
|
|
}
|