mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-06 05:49:24 +08:00
eaabe0293b
Related to #28736 #28748 See also #27675 Previous PR: #28646 This PR fixes `SegmentNotFound` issue when compaction happens multiple times and the buffer of first generation segment is sync due to stale policy Now the `CompactSegments` API of metacache shall update the compactTo field of segmentInfo if the compactTo segment is also compacted to keep the bloodline clean Also, add the `CompactedSegment` SyncPolicy to sync the compacted segment asap to keep metacache clean Now the `SyncPolicy` is an interface instead of a function type so that when it selects some segments to sync, we colud log the reason and target segment Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
240 lines
7.2 KiB
Go
240 lines
7.2 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package metacache
|
|
|
|
import (
|
|
"sync"
|
|
|
|
"github.com/samber/lo"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
)
|
|
|
|
type MetaCache interface {
|
|
// Collection returns collection id of metacache.
|
|
Collection() int64
|
|
// Schema returns collection schema.
|
|
Schema() *schemapb.CollectionSchema
|
|
// AddSegment adds a segment from segment info.
|
|
AddSegment(segInfo *datapb.SegmentInfo, factory PkStatsFactory, actions ...SegmentAction)
|
|
// UpdateSegments applies action to segment(s) satisfy the provided filters.
|
|
UpdateSegments(action SegmentAction, filters ...SegmentFilter)
|
|
// RemoveSegments removes segments matches the provided filter.
|
|
RemoveSegments(filters ...SegmentFilter) []int64
|
|
// CompactSegments transfers compaction segment results inside the metacache.
|
|
CompactSegments(newSegmentID, partitionID int64, numRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64)
|
|
// GetSegmentsBy returns segments statify the provided filters.
|
|
GetSegmentsBy(filters ...SegmentFilter) []*SegmentInfo
|
|
// GetSegmentByID returns segment with provided segment id if exists.
|
|
GetSegmentByID(id int64, filters ...SegmentFilter) (*SegmentInfo, bool)
|
|
// GetSegmentIDs returns ids of segments which satifiy the provided filters.
|
|
GetSegmentIDsBy(filters ...SegmentFilter) []int64
|
|
// PredictSegments returns the segment ids which may contain the provided primary key.
|
|
PredictSegments(pk storage.PrimaryKey, filters ...SegmentFilter) ([]int64, bool)
|
|
}
|
|
|
|
var _ MetaCache = (*metaCacheImpl)(nil)
|
|
|
|
type PkStatsFactory func(vchannel *datapb.SegmentInfo) *BloomFilterSet
|
|
|
|
type metaCacheImpl struct {
|
|
collectionID int64
|
|
vChannelName string
|
|
segmentInfos map[int64]*SegmentInfo
|
|
schema *schemapb.CollectionSchema
|
|
mu sync.RWMutex
|
|
}
|
|
|
|
func NewMetaCache(info *datapb.ChannelWatchInfo, factory PkStatsFactory) MetaCache {
|
|
vchannel := info.GetVchan()
|
|
cache := &metaCacheImpl{
|
|
collectionID: vchannel.GetCollectionID(),
|
|
vChannelName: vchannel.GetChannelName(),
|
|
segmentInfos: make(map[int64]*SegmentInfo),
|
|
schema: info.GetSchema(),
|
|
}
|
|
|
|
cache.init(vchannel, factory)
|
|
return cache
|
|
}
|
|
|
|
func (c *metaCacheImpl) init(vchannel *datapb.VchannelInfo, factory PkStatsFactory) {
|
|
for _, seg := range vchannel.FlushedSegments {
|
|
c.segmentInfos[seg.GetID()] = NewSegmentInfo(seg, factory(seg))
|
|
}
|
|
|
|
for _, seg := range vchannel.UnflushedSegments {
|
|
c.segmentInfos[seg.GetID()] = NewSegmentInfo(seg, factory(seg))
|
|
}
|
|
}
|
|
|
|
// Collection returns collection id of metacache.
|
|
func (c *metaCacheImpl) Collection() int64 {
|
|
return c.collectionID
|
|
}
|
|
|
|
// Schema returns collection schema.
|
|
func (c *metaCacheImpl) Schema() *schemapb.CollectionSchema {
|
|
return c.schema
|
|
}
|
|
|
|
// AddSegment adds a segment from segment info.
|
|
func (c *metaCacheImpl) AddSegment(segInfo *datapb.SegmentInfo, factory PkStatsFactory, actions ...SegmentAction) {
|
|
segment := NewSegmentInfo(segInfo, factory(segInfo))
|
|
|
|
for _, action := range actions {
|
|
action(segment)
|
|
}
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
c.segmentInfos[segInfo.GetID()] = segment
|
|
}
|
|
|
|
func (c *metaCacheImpl) CompactSegments(newSegmentID, partitionID int64, numOfRows int64, bfs *BloomFilterSet, oldSegmentIDs ...int64) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
compactTo := NullSegment
|
|
if numOfRows > 0 {
|
|
compactTo = newSegmentID
|
|
if _, ok := c.segmentInfos[newSegmentID]; !ok {
|
|
c.segmentInfos[newSegmentID] = &SegmentInfo{
|
|
segmentID: newSegmentID,
|
|
partitionID: partitionID,
|
|
state: commonpb.SegmentState_Flushed,
|
|
startPosRecorded: true,
|
|
bfs: bfs,
|
|
}
|
|
}
|
|
log.Info("add compactTo segment info metacache", zap.Int64("segmentID", compactTo))
|
|
}
|
|
|
|
oldSet := typeutil.NewSet(oldSegmentIDs...)
|
|
for _, segment := range c.segmentInfos {
|
|
if oldSet.Contain(segment.segmentID) ||
|
|
oldSet.Contain(segment.compactTo) {
|
|
updated := segment.Clone()
|
|
updated.compactTo = compactTo
|
|
c.segmentInfos[segment.segmentID] = updated
|
|
log.Info("update segment compactTo",
|
|
zap.Int64("segmentID", segment.segmentID),
|
|
zap.Int64("originalCompactTo", segment.compactTo),
|
|
zap.Int64("compactTo", compactTo))
|
|
}
|
|
}
|
|
}
|
|
|
|
func (c *metaCacheImpl) RemoveSegments(filters ...SegmentFilter) []int64 {
|
|
if len(filters) == 0 {
|
|
log.Warn("remove segment without filters is not allowed", zap.Stack("callstack"))
|
|
return nil
|
|
}
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
filter := c.mergeFilters(filters...)
|
|
|
|
var ids []int64
|
|
for segID, info := range c.segmentInfos {
|
|
if filter(info) {
|
|
ids = append(ids, segID)
|
|
delete(c.segmentInfos, segID)
|
|
}
|
|
}
|
|
return ids
|
|
}
|
|
|
|
func (c *metaCacheImpl) GetSegmentsBy(filters ...SegmentFilter) []*SegmentInfo {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
|
|
filter := c.mergeFilters(filters...)
|
|
|
|
var segments []*SegmentInfo
|
|
for _, info := range c.segmentInfos {
|
|
if filter(info) {
|
|
segments = append(segments, info)
|
|
}
|
|
}
|
|
return segments
|
|
}
|
|
|
|
// GetSegmentByID returns segment with provided segment id if exists.
|
|
func (c *metaCacheImpl) GetSegmentByID(id int64, filters ...SegmentFilter) (*SegmentInfo, bool) {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
|
|
segment, ok := c.segmentInfos[id]
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
if !c.mergeFilters(filters...)(segment) {
|
|
return nil, false
|
|
}
|
|
return segment, ok
|
|
}
|
|
|
|
func (c *metaCacheImpl) GetSegmentIDsBy(filters ...SegmentFilter) []int64 {
|
|
segments := c.GetSegmentsBy(filters...)
|
|
return lo.Map(segments, func(info *SegmentInfo, _ int) int64 { return info.SegmentID() })
|
|
}
|
|
|
|
func (c *metaCacheImpl) UpdateSegments(action SegmentAction, filters ...SegmentFilter) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
filter := c.mergeFilters(filters...)
|
|
|
|
for id, info := range c.segmentInfos {
|
|
if !filter(info) {
|
|
continue
|
|
}
|
|
nInfo := info.Clone()
|
|
action(nInfo)
|
|
c.segmentInfos[id] = nInfo
|
|
}
|
|
}
|
|
|
|
func (c *metaCacheImpl) PredictSegments(pk storage.PrimaryKey, filters ...SegmentFilter) ([]int64, bool) {
|
|
var predicts []int64
|
|
segments := c.GetSegmentsBy(filters...)
|
|
for _, segment := range segments {
|
|
if segment.GetBloomFilterSet().PkExists(pk) {
|
|
predicts = append(predicts, segment.segmentID)
|
|
}
|
|
}
|
|
return predicts, len(predicts) > 0
|
|
}
|
|
|
|
func (c *metaCacheImpl) mergeFilters(filters ...SegmentFilter) SegmentFilter {
|
|
return func(info *SegmentInfo) bool {
|
|
for _, filter := range filters {
|
|
if !filter(info) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
}
|