mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-01 11:29:48 +08:00
28e949cf31
The original GetVChanPositions are specialized for QueryCoord, the recovery info needed by DataNode shouldn't take `indexed` state into account. This PR splits GetVChanPositions into GetDataVChanPositions and GetQueryVChanPositions. See also: #19653 Signed-off-by: yangxuan <xuan.yang@zilliz.com> Signed-off-by: yangxuan <xuan.yang@zilliz.com>
296 lines
10 KiB
Go
296 lines
10 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datacoord
|
|
|
|
import (
|
|
"context"
|
|
|
|
"github.com/milvus-io/milvus/api/commonpb"
|
|
"github.com/milvus-io/milvus/internal/log"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/proto/internalpb"
|
|
"github.com/milvus-io/milvus/internal/util/funcutil"
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
"go.uber.org/zap"
|
|
)
|
|
|
|
// Handler handles some channel method for ChannelManager
|
|
type Handler interface {
|
|
// GetQueryVChanPositions gets the information recovery needed of a channel for QueryCoord
|
|
GetQueryVChanPositions(channel *channel, partitionID UniqueID) *datapb.VchannelInfo
|
|
// GetDataVChanPositions gets the information recovery needed of a channel for DataNode
|
|
GetDataVChanPositions(channel *channel, partitionID UniqueID) *datapb.VchannelInfo
|
|
CheckShouldDropChannel(channel string) bool
|
|
FinishDropChannel(channel string)
|
|
}
|
|
|
|
// ServerHandler is a helper of Server
|
|
type ServerHandler struct {
|
|
s *Server
|
|
}
|
|
|
|
// newServerHandler creates a new ServerHandler
|
|
func newServerHandler(s *Server) *ServerHandler {
|
|
return &ServerHandler{s: s}
|
|
}
|
|
|
|
// GetDataVChanPositions gets vchannel latest postitions with provided dml channel names for DataNode.
|
|
func (h *ServerHandler) GetDataVChanPositions(channel *channel, partitionID UniqueID) *datapb.VchannelInfo {
|
|
segments := h.s.meta.SelectSegments(func(s *SegmentInfo) bool {
|
|
return s.InsertChannel == channel.Name
|
|
})
|
|
log.Info("GetDataVChanPositions",
|
|
zap.Int64("collectionID", channel.CollectionID),
|
|
zap.String("channel", channel.Name),
|
|
zap.Int("numOfSegments", len(segments)),
|
|
)
|
|
var (
|
|
flushedIDs = make(typeutil.UniqueSet)
|
|
unflushedIDs = make(typeutil.UniqueSet)
|
|
droppedIDs = make(typeutil.UniqueSet)
|
|
seekPosition *internalpb.MsgPosition
|
|
)
|
|
for _, s := range segments {
|
|
if (partitionID > allPartitionID && s.PartitionID != partitionID) ||
|
|
(s.GetStartPosition() == nil && s.GetDmlPosition() == nil) {
|
|
continue
|
|
}
|
|
if s.GetIsImporting() {
|
|
// Skip bulk load segments.
|
|
continue
|
|
}
|
|
|
|
if s.GetState() == commonpb.SegmentState_Dropped {
|
|
droppedIDs.Insert(s.GetID())
|
|
continue
|
|
} else if s.GetState() == commonpb.SegmentState_Flushing || s.GetState() == commonpb.SegmentState_Flushed {
|
|
flushedIDs.Insert(s.GetID())
|
|
} else {
|
|
unflushedIDs.Insert(s.GetID())
|
|
}
|
|
|
|
var segmentPosition *internalpb.MsgPosition
|
|
if s.GetDmlPosition() != nil {
|
|
segmentPosition = s.GetDmlPosition()
|
|
} else {
|
|
segmentPosition = s.GetStartPosition()
|
|
}
|
|
if seekPosition == nil || segmentPosition.Timestamp < seekPosition.Timestamp {
|
|
seekPosition = segmentPosition
|
|
}
|
|
}
|
|
|
|
// use collection start position when segment position is not found
|
|
if seekPosition == nil {
|
|
if channel.StartPositions == nil {
|
|
collection := h.GetCollection(h.s.ctx, channel.CollectionID)
|
|
if collection != nil {
|
|
seekPosition = getCollectionStartPosition(channel.Name, collection)
|
|
}
|
|
} else {
|
|
// use passed start positions, skip to ask rootcoord.
|
|
seekPosition = toMsgPosition(channel.Name, channel.StartPositions)
|
|
}
|
|
}
|
|
|
|
return &datapb.VchannelInfo{
|
|
CollectionID: channel.CollectionID,
|
|
ChannelName: channel.Name,
|
|
SeekPosition: seekPosition,
|
|
FlushedSegmentIds: flushedIDs.Collect(),
|
|
UnflushedSegmentIds: unflushedIDs.Collect(),
|
|
DroppedSegmentIds: droppedIDs.Collect(),
|
|
}
|
|
}
|
|
|
|
// GetQueryVChanPositions gets vchannel latest postitions with provided dml channel names for QueryCoord,
|
|
// we expect QueryCoord gets the indexed segments to load, so the flushed segments below are actually the indexed segments,
|
|
// the unflushed segments are actually the segments without index, even they are flushed.
|
|
func (h *ServerHandler) GetQueryVChanPositions(channel *channel, partitionID UniqueID) *datapb.VchannelInfo {
|
|
// cannot use GetSegmentsByChannel since dropped segments are needed here
|
|
segments := h.s.meta.SelectSegments(func(s *SegmentInfo) bool {
|
|
return s.InsertChannel == channel.Name
|
|
})
|
|
segmentInfos := make(map[int64]*SegmentInfo)
|
|
indexedSegments := FilterInIndexedSegments(h.s.meta, h.s.indexCoord, segments...)
|
|
indexed := make(typeutil.UniqueSet)
|
|
for _, segment := range indexedSegments {
|
|
indexed.Insert(segment.GetID())
|
|
}
|
|
log.Info("GetQueryVChanPositions",
|
|
zap.Int64("collectionID", channel.CollectionID),
|
|
zap.String("channel", channel.Name),
|
|
zap.Int("numOfSegments", len(segments)),
|
|
)
|
|
var (
|
|
indexedIDs = make(typeutil.UniqueSet)
|
|
unIndexedIDs = make(typeutil.UniqueSet)
|
|
droppedIDs = make(typeutil.UniqueSet)
|
|
seekPosition *internalpb.MsgPosition
|
|
)
|
|
for _, s := range segments {
|
|
if (partitionID > allPartitionID && s.PartitionID != partitionID) ||
|
|
(s.GetStartPosition() == nil && s.GetDmlPosition() == nil) {
|
|
continue
|
|
}
|
|
if s.GetIsImporting() {
|
|
// Skip bulk load segments.
|
|
continue
|
|
}
|
|
segmentInfos[s.GetID()] = s
|
|
if s.GetState() == commonpb.SegmentState_Dropped {
|
|
droppedIDs.Insert(s.GetID())
|
|
} else if indexed.Contain(s.GetID()) {
|
|
indexedIDs.Insert(s.GetID())
|
|
} else {
|
|
unIndexedIDs.Insert(s.GetID())
|
|
}
|
|
}
|
|
for id := range unIndexedIDs {
|
|
// Indexed segments are compacted to a raw segment,
|
|
// replace it with the indexed ones
|
|
if len(segmentInfos[id].GetCompactionFrom()) > 0 &&
|
|
indexed.Contain(segmentInfos[id].GetCompactionFrom()...) {
|
|
unIndexedIDs.Remove(id)
|
|
indexedIDs.Insert(segmentInfos[id].GetCompactionFrom()...)
|
|
droppedIDs.Remove(segmentInfos[id].GetCompactionFrom()...)
|
|
}
|
|
}
|
|
|
|
for id := range indexedIDs {
|
|
var segmentPosition *internalpb.MsgPosition
|
|
segment := segmentInfos[id]
|
|
if segment.GetDmlPosition() != nil {
|
|
segmentPosition = segment.GetDmlPosition()
|
|
} else {
|
|
segmentPosition = segment.GetStartPosition()
|
|
}
|
|
|
|
if seekPosition == nil || segmentPosition.Timestamp < seekPosition.Timestamp {
|
|
seekPosition = segmentPosition
|
|
}
|
|
}
|
|
for id := range unIndexedIDs {
|
|
var segmentPosition *internalpb.MsgPosition
|
|
segment := segmentInfos[id]
|
|
if segment.GetDmlPosition() != nil {
|
|
segmentPosition = segment.GetDmlPosition()
|
|
} else {
|
|
segmentPosition = segment.GetStartPosition()
|
|
}
|
|
|
|
if seekPosition == nil || segmentPosition.Timestamp < seekPosition.Timestamp {
|
|
seekPosition = segmentPosition
|
|
}
|
|
}
|
|
|
|
// use collection start position when segment position is not found
|
|
if seekPosition == nil {
|
|
if channel.StartPositions == nil {
|
|
collection := h.GetCollection(h.s.ctx, channel.CollectionID)
|
|
if collection != nil {
|
|
seekPosition = getCollectionStartPosition(channel.Name, collection)
|
|
}
|
|
} else {
|
|
// use passed start positions, skip to ask rootcoord.
|
|
seekPosition = toMsgPosition(channel.Name, channel.StartPositions)
|
|
}
|
|
}
|
|
|
|
return &datapb.VchannelInfo{
|
|
CollectionID: channel.CollectionID,
|
|
ChannelName: channel.Name,
|
|
SeekPosition: seekPosition,
|
|
FlushedSegmentIds: indexedIDs.Collect(),
|
|
UnflushedSegmentIds: unIndexedIDs.Collect(),
|
|
DroppedSegmentIds: droppedIDs.Collect(),
|
|
}
|
|
}
|
|
|
|
func getCollectionStartPosition(channel string, collectionInfo *collectionInfo) *internalpb.MsgPosition {
|
|
return toMsgPosition(channel, collectionInfo.StartPositions)
|
|
}
|
|
|
|
func toMsgPosition(channel string, startPositions []*commonpb.KeyDataPair) *internalpb.MsgPosition {
|
|
for _, sp := range startPositions {
|
|
if sp.GetKey() != funcutil.ToPhysicalChannel(channel) {
|
|
continue
|
|
}
|
|
return &internalpb.MsgPosition{
|
|
ChannelName: channel,
|
|
MsgID: sp.GetData(),
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// trimSegmentInfo returns a shallow copy of datapb.SegmentInfo and sets ALL binlog info to nil
|
|
func trimSegmentInfo(info *datapb.SegmentInfo) *datapb.SegmentInfo {
|
|
return &datapb.SegmentInfo{
|
|
ID: info.ID,
|
|
CollectionID: info.CollectionID,
|
|
PartitionID: info.PartitionID,
|
|
InsertChannel: info.InsertChannel,
|
|
NumOfRows: info.NumOfRows,
|
|
State: info.State,
|
|
MaxRowNum: info.MaxRowNum,
|
|
LastExpireTime: info.LastExpireTime,
|
|
StartPosition: info.StartPosition,
|
|
DmlPosition: info.DmlPosition,
|
|
}
|
|
}
|
|
|
|
// GetCollection returns collection info with specified collection id
|
|
func (h *ServerHandler) GetCollection(ctx context.Context, collectionID UniqueID) *collectionInfo {
|
|
coll := h.s.meta.GetCollection(collectionID)
|
|
if coll != nil {
|
|
return coll
|
|
}
|
|
err := h.s.loadCollectionFromRootCoord(ctx, collectionID)
|
|
if err != nil {
|
|
log.Warn("failed to load collection from rootcoord", zap.Int64("collectionID", collectionID), zap.Error(err))
|
|
}
|
|
|
|
return h.s.meta.GetCollection(collectionID)
|
|
}
|
|
|
|
// CheckShouldDropChannel returns whether specified channel is marked to be removed
|
|
func (h *ServerHandler) CheckShouldDropChannel(channel string) bool {
|
|
/*
|
|
segments := h.s.meta.GetSegmentsByChannel(channel)
|
|
for _, segment := range segments {
|
|
if segment.GetStartPosition() != nil && // filter empty segment
|
|
// FIXME: we filter compaction generated segments
|
|
// because datanode may not know the segment due to the network lag or
|
|
// datacoord crash when handling CompleteCompaction.
|
|
// FIXME: cancel this limitation for #12265
|
|
// need to change a unified DropAndFlush to solve the root problem
|
|
//len(segment.CompactionFrom) == 0 &&
|
|
segment.GetState() != commonpb.SegmentState_Dropped {
|
|
return false
|
|
}
|
|
}
|
|
return false*/
|
|
return h.s.meta.catalog.IsChannelDropped(h.s.ctx, channel)
|
|
}
|
|
|
|
// FinishDropChannel cleans up the remove flag for channels
|
|
// this function is a wrapper of server.meta.FinishDropChannel
|
|
func (h *ServerHandler) FinishDropChannel(channel string) {
|
|
h.s.meta.catalog.DropChannel(h.s.ctx, channel)
|
|
}
|