2023-10-23 19:42:10 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package metacache
|
|
|
|
|
|
|
|
import (
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
"github.com/samber/lo"
|
|
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
|
2023-10-28 11:12:11 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
2023-11-14 15:08:19 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2024-08-22 19:42:57 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/flushcommon/metacache/pkoracle"
|
2023-10-23 19:42:10 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
2023-11-14 15:08:19 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
2024-01-19 18:52:53 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
2023-10-23 19:42:10 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type MetaCacheSuite struct {
|
|
|
|
suite.Suite
|
|
|
|
|
|
|
|
collectionID int64
|
2023-11-14 15:08:19 +08:00
|
|
|
collSchema *schemapb.CollectionSchema
|
2023-10-23 19:42:10 +08:00
|
|
|
vchannel string
|
|
|
|
invaliedSeg int64
|
|
|
|
partitionIDs []int64
|
|
|
|
flushedSegments []int64
|
|
|
|
growingSegments []int64
|
|
|
|
newSegments []int64
|
|
|
|
cache MetaCache
|
2023-10-28 11:12:11 +08:00
|
|
|
|
|
|
|
bfsFactory PkStatsFactory
|
2023-10-23 19:42:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MetaCacheSuite) SetupSuite() {
|
2024-01-19 18:52:53 +08:00
|
|
|
paramtable.Init()
|
|
|
|
|
2023-10-23 19:42:10 +08:00
|
|
|
s.collectionID = 1
|
|
|
|
s.vchannel = "test"
|
|
|
|
s.partitionIDs = []int64{1, 2, 3, 4}
|
|
|
|
s.flushedSegments = []int64{1, 2, 3, 4}
|
|
|
|
s.growingSegments = []int64{5, 6, 7, 8}
|
|
|
|
s.newSegments = []int64{9, 10, 11, 12}
|
|
|
|
s.invaliedSeg = 111
|
2024-08-22 19:42:57 +08:00
|
|
|
s.bfsFactory = func(*datapb.SegmentInfo) pkoracle.PkStat {
|
|
|
|
return pkoracle.NewBloomFilterSet()
|
2023-10-28 11:12:11 +08:00
|
|
|
}
|
2023-11-14 15:08:19 +08:00
|
|
|
s.collSchema = &schemapb.CollectionSchema{
|
|
|
|
Name: "test_collection",
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{FieldID: 100, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"},
|
|
|
|
{FieldID: 101, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
|
|
|
|
{Key: common.DimKey, Value: "128"},
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
}
|
2023-10-23 19:42:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MetaCacheSuite) SetupTest() {
|
|
|
|
flushSegmentInfos := lo.RepeatBy(len(s.flushedSegments), func(i int) *datapb.SegmentInfo {
|
|
|
|
return &datapb.SegmentInfo{
|
|
|
|
ID: s.flushedSegments[i],
|
|
|
|
PartitionID: s.partitionIDs[i],
|
2023-10-28 11:12:11 +08:00
|
|
|
State: commonpb.SegmentState_Flushed,
|
2023-10-23 19:42:10 +08:00
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
growingSegmentInfos := lo.RepeatBy(len(s.growingSegments), func(i int) *datapb.SegmentInfo {
|
|
|
|
return &datapb.SegmentInfo{
|
|
|
|
ID: s.growingSegments[i],
|
|
|
|
PartitionID: s.partitionIDs[i],
|
2023-10-28 11:12:11 +08:00
|
|
|
State: commonpb.SegmentState_Growing,
|
2023-10-23 19:42:10 +08:00
|
|
|
}
|
|
|
|
})
|
|
|
|
|
2023-11-14 15:08:19 +08:00
|
|
|
s.cache = NewMetaCache(&datapb.ChannelWatchInfo{
|
|
|
|
Schema: s.collSchema,
|
|
|
|
Vchan: &datapb.VchannelInfo{
|
|
|
|
CollectionID: s.collectionID,
|
|
|
|
ChannelName: s.vchannel,
|
|
|
|
FlushedSegments: flushSegmentInfos,
|
|
|
|
UnflushedSegments: growingSegmentInfos,
|
|
|
|
},
|
2023-10-28 11:12:11 +08:00
|
|
|
}, s.bfsFactory)
|
2023-10-23 19:42:10 +08:00
|
|
|
}
|
|
|
|
|
2023-11-14 15:08:19 +08:00
|
|
|
func (s *MetaCacheSuite) TestMetaInfo() {
|
|
|
|
s.Equal(s.collectionID, s.cache.Collection())
|
|
|
|
s.Equal(s.collSchema, s.cache.Schema())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MetaCacheSuite) TestAddSegment() {
|
|
|
|
testSegs := []int64{100, 101, 102}
|
|
|
|
for _, segID := range testSegs {
|
|
|
|
info := &datapb.SegmentInfo{
|
|
|
|
ID: segID,
|
|
|
|
PartitionID: 10,
|
|
|
|
}
|
2024-08-22 19:42:57 +08:00
|
|
|
s.cache.AddSegment(info, func(info *datapb.SegmentInfo) pkoracle.PkStat {
|
|
|
|
return pkoracle.NewBloomFilterSet()
|
2023-11-14 15:08:19 +08:00
|
|
|
}, UpdateState(commonpb.SegmentState_Flushed))
|
|
|
|
}
|
|
|
|
|
|
|
|
segments := s.cache.GetSegmentsBy(WithSegmentIDs(testSegs...))
|
|
|
|
s.Require().Equal(3, len(segments))
|
|
|
|
for _, seg := range segments {
|
|
|
|
s.Equal(commonpb.SegmentState_Flushed, seg.State())
|
|
|
|
s.EqualValues(10, seg.partitionID)
|
|
|
|
|
|
|
|
seg, ok := s.cache.GetSegmentByID(seg.segmentID, WithSegmentState(commonpb.SegmentState_Flushed))
|
|
|
|
s.NotNil(seg)
|
|
|
|
s.True(ok)
|
|
|
|
seg, ok = s.cache.GetSegmentByID(seg.segmentID, WithSegmentState(commonpb.SegmentState_Growing))
|
|
|
|
s.Nil(seg)
|
|
|
|
s.False(ok)
|
|
|
|
}
|
|
|
|
|
|
|
|
gotSegIDs := lo.Map(segments, func(info *SegmentInfo, _ int) int64 {
|
|
|
|
return info.segmentID
|
|
|
|
})
|
|
|
|
|
|
|
|
s.ElementsMatch(testSegs, gotSegIDs)
|
|
|
|
}
|
|
|
|
|
2023-10-28 11:12:11 +08:00
|
|
|
func (s *MetaCacheSuite) TestUpdateSegments() {
|
2023-11-04 12:10:17 +08:00
|
|
|
s.cache.UpdateSegments(UpdateState(commonpb.SegmentState_Flushed), WithSegmentIDs(5))
|
|
|
|
segments := s.cache.GetSegmentsBy(WithSegmentIDs(5))
|
2023-10-28 11:12:11 +08:00
|
|
|
s.Require().Equal(1, len(segments))
|
|
|
|
segment := segments[0]
|
|
|
|
s.Equal(commonpb.SegmentState_Flushed, segment.State())
|
|
|
|
}
|
|
|
|
|
2023-11-24 15:38:25 +08:00
|
|
|
func (s *MetaCacheSuite) TestRemoveSegments() {
|
|
|
|
ids := s.cache.RemoveSegments()
|
|
|
|
s.Empty(ids, "remove without filter shall not succeed")
|
|
|
|
|
|
|
|
ids = s.cache.RemoveSegments(WithSegmentIDs(s.flushedSegments...))
|
|
|
|
s.ElementsMatch(s.flushedSegments, ids)
|
|
|
|
|
|
|
|
for _, segID := range s.flushedSegments {
|
|
|
|
_, ok := s.cache.GetSegmentByID(segID)
|
|
|
|
s.False(ok)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-11-14 15:08:19 +08:00
|
|
|
func (s *MetaCacheSuite) TestPredictSegments() {
|
|
|
|
pk := storage.NewInt64PrimaryKey(100)
|
|
|
|
predict, ok := s.cache.PredictSegments(pk)
|
|
|
|
s.False(ok)
|
|
|
|
s.Empty(predict)
|
|
|
|
|
|
|
|
pkFieldData := &storage.Int64FieldData{
|
|
|
|
Data: []int64{1, 2, 3, 4, 5, 6, 7},
|
|
|
|
}
|
|
|
|
info, got := s.cache.GetSegmentByID(1)
|
|
|
|
s.Require().True(got)
|
|
|
|
s.Require().NotNil(info)
|
|
|
|
err := info.GetBloomFilterSet().UpdatePKRange(pkFieldData)
|
|
|
|
s.Require().NoError(err)
|
|
|
|
|
2024-03-11 10:55:02 +08:00
|
|
|
predict, ok = s.cache.PredictSegments(pk, SegmentFilterFunc(func(s *SegmentInfo) bool {
|
2023-11-14 15:08:19 +08:00
|
|
|
return s.segmentID == 1
|
2024-03-11 10:55:02 +08:00
|
|
|
}))
|
2023-11-14 15:08:19 +08:00
|
|
|
s.False(ok)
|
|
|
|
s.Empty(predict)
|
|
|
|
|
|
|
|
predict, ok = s.cache.PredictSegments(
|
|
|
|
storage.NewInt64PrimaryKey(5),
|
2024-03-11 10:55:02 +08:00
|
|
|
SegmentFilterFunc(func(s *SegmentInfo) bool {
|
2023-11-14 15:08:19 +08:00
|
|
|
return s.segmentID == 1
|
2024-03-11 10:55:02 +08:00
|
|
|
}))
|
2023-11-14 15:08:19 +08:00
|
|
|
s.True(ok)
|
|
|
|
s.NotEmpty(predict)
|
|
|
|
s.Equal(1, len(predict))
|
|
|
|
s.EqualValues(1, predict[0])
|
|
|
|
}
|
|
|
|
|
2024-05-30 13:37:44 +08:00
|
|
|
func (s *MetaCacheSuite) Test_DetectMissingSegments() {
|
|
|
|
segments := map[int64]struct{}{
|
|
|
|
1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 9: {}, 10: {},
|
|
|
|
}
|
|
|
|
|
|
|
|
missingSegments := s.cache.DetectMissingSegments(segments)
|
|
|
|
s.ElementsMatch(missingSegments, []int64{9, 10})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *MetaCacheSuite) Test_UpdateSegmentView() {
|
|
|
|
addSegments := []*datapb.SyncSegmentInfo{
|
|
|
|
{
|
|
|
|
SegmentId: 100,
|
|
|
|
PkStatsLog: nil,
|
|
|
|
State: commonpb.SegmentState_Flushed,
|
|
|
|
Level: datapb.SegmentLevel_L1,
|
|
|
|
NumOfRows: 10240,
|
|
|
|
},
|
|
|
|
}
|
2024-08-22 19:42:57 +08:00
|
|
|
addSegmentsBF := []*pkoracle.BloomFilterSet{
|
|
|
|
pkoracle.NewBloomFilterSet(),
|
2024-05-30 13:37:44 +08:00
|
|
|
}
|
|
|
|
segments := map[int64]struct{}{
|
|
|
|
1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 100: {},
|
|
|
|
}
|
|
|
|
|
|
|
|
s.cache.UpdateSegmentView(1, addSegments, addSegmentsBF, segments)
|
|
|
|
|
|
|
|
addSegments = []*datapb.SyncSegmentInfo{
|
|
|
|
{
|
|
|
|
SegmentId: 101,
|
|
|
|
PkStatsLog: nil,
|
|
|
|
State: commonpb.SegmentState_Flushed,
|
|
|
|
Level: datapb.SegmentLevel_L1,
|
|
|
|
NumOfRows: 10240,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
segments = map[int64]struct{}{
|
|
|
|
1: {}, 2: {}, 3: {}, 4: {}, 5: {}, 6: {}, 7: {}, 8: {}, 101: {},
|
|
|
|
}
|
|
|
|
s.cache.UpdateSegmentView(1, addSegments, addSegmentsBF, segments)
|
|
|
|
}
|
|
|
|
|
2023-10-23 19:42:10 +08:00
|
|
|
func TestMetaCacheSuite(t *testing.T) {
|
|
|
|
suite.Run(t, new(MetaCacheSuite))
|
|
|
|
}
|
2024-03-11 10:55:02 +08:00
|
|
|
|
|
|
|
func BenchmarkGetSegmentsBy(b *testing.B) {
|
|
|
|
paramtable.Init()
|
|
|
|
schema := &schemapb.CollectionSchema{
|
|
|
|
Name: "test_collection",
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{FieldID: 100, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"},
|
|
|
|
{FieldID: 101, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
|
|
|
|
{Key: common.DimKey, Value: "128"},
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
flushSegmentInfos := lo.RepeatBy(10000, func(i int) *datapb.SegmentInfo {
|
|
|
|
return &datapb.SegmentInfo{
|
|
|
|
ID: int64(i),
|
|
|
|
State: commonpb.SegmentState_Flushed,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
cache := NewMetaCache(&datapb.ChannelWatchInfo{
|
|
|
|
Schema: schema,
|
|
|
|
Vchan: &datapb.VchannelInfo{
|
|
|
|
FlushedSegments: flushSegmentInfos,
|
|
|
|
},
|
2024-08-22 19:42:57 +08:00
|
|
|
}, func(*datapb.SegmentInfo) pkoracle.PkStat {
|
|
|
|
return pkoracle.NewBloomFilterSet()
|
2024-03-11 10:55:02 +08:00
|
|
|
})
|
|
|
|
b.ResetTimer()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
filter := WithSegmentIDs(0)
|
|
|
|
cache.GetSegmentsBy(filter)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func BenchmarkGetSegmentsByWithoutIDs(b *testing.B) {
|
|
|
|
paramtable.Init()
|
|
|
|
schema := &schemapb.CollectionSchema{
|
|
|
|
Name: "test_collection",
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{FieldID: 100, DataType: schemapb.DataType_Int64, IsPrimaryKey: true, Name: "pk"},
|
|
|
|
{FieldID: 101, DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{
|
|
|
|
{Key: common.DimKey, Value: "128"},
|
|
|
|
}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
flushSegmentInfos := lo.RepeatBy(10000, func(i int) *datapb.SegmentInfo {
|
|
|
|
return &datapb.SegmentInfo{
|
|
|
|
ID: int64(i),
|
|
|
|
State: commonpb.SegmentState_Flushed,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
cache := NewMetaCache(&datapb.ChannelWatchInfo{
|
|
|
|
Schema: schema,
|
|
|
|
Vchan: &datapb.VchannelInfo{
|
|
|
|
FlushedSegments: flushSegmentInfos,
|
|
|
|
},
|
2024-08-22 19:42:57 +08:00
|
|
|
}, func(*datapb.SegmentInfo) pkoracle.PkStat {
|
|
|
|
return pkoracle.NewBloomFilterSet()
|
2024-03-11 10:55:02 +08:00
|
|
|
})
|
|
|
|
b.ResetTimer()
|
|
|
|
for i := 0; i < b.N; i++ {
|
|
|
|
// use old func filter
|
|
|
|
filter := SegmentFilterFunc(func(info *SegmentInfo) bool {
|
|
|
|
return info.segmentID == 0
|
|
|
|
})
|
|
|
|
cache.GetSegmentsBy(filter)
|
|
|
|
}
|
|
|
|
}
|