2023-03-27 00:42:00 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package segments
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
"math/rand"
|
|
|
|
"testing"
|
|
|
|
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
|
2023-06-09 01:28:37 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2023-03-27 00:42:00 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/querypb"
|
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2023-04-20 11:32:31 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
2023-03-27 00:42:00 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type SegmentLoaderSuite struct {
|
|
|
|
suite.Suite
|
|
|
|
loader Loader
|
|
|
|
|
|
|
|
// Dependencies
|
2023-05-11 15:33:24 +08:00
|
|
|
manager *Manager
|
|
|
|
chunkManager storage.ChunkManager
|
2023-03-27 00:42:00 +08:00
|
|
|
|
|
|
|
// Data
|
|
|
|
collectionID int64
|
|
|
|
partitionID int64
|
|
|
|
segmentID int64
|
|
|
|
schema *schemapb.CollectionSchema
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) SetupSuite() {
|
|
|
|
paramtable.Init()
|
|
|
|
suite.collectionID = rand.Int63()
|
|
|
|
suite.partitionID = rand.Int63()
|
|
|
|
suite.segmentID = rand.Int63()
|
|
|
|
suite.schema = GenTestCollectionSchema("test", schemapb.DataType_Int64)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) SetupTest() {
|
|
|
|
// Dependencies
|
2023-05-11 15:33:24 +08:00
|
|
|
suite.manager = NewManager()
|
2023-03-27 00:42:00 +08:00
|
|
|
suite.chunkManager = storage.NewLocalChunkManager(storage.RootPath(
|
|
|
|
fmt.Sprintf("/tmp/milvus-ut/%d", rand.Int63())))
|
|
|
|
|
2023-05-11 15:33:24 +08:00
|
|
|
suite.loader = NewLoader(suite.manager, suite.chunkManager)
|
2023-03-27 00:42:00 +08:00
|
|
|
|
|
|
|
// Data
|
|
|
|
schema := GenTestCollectionSchema("test", schemapb.DataType_Int64)
|
2023-04-26 10:14:41 +08:00
|
|
|
indexMeta := GenTestIndexMeta(suite.collectionID, schema)
|
2023-03-27 00:42:00 +08:00
|
|
|
loadMeta := &querypb.LoadMetaInfo{
|
|
|
|
LoadType: querypb.LoadType_LoadCollection,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
PartitionIDs: []int64{suite.partitionID},
|
|
|
|
}
|
2023-05-11 15:33:24 +08:00
|
|
|
suite.manager.Collection.Put(suite.collectionID, schema, indexMeta, loadMeta)
|
2023-03-27 00:42:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) TestLoad() {
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
// Load sealed
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
suite.segmentID,
|
2023-04-20 11:32:31 +08:00
|
|
|
4,
|
2023-03-27 00:42:00 +08:00
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
_, err = suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: suite.segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
})
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
// Load growing
|
|
|
|
binlogs, statsLogs, err = SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
suite.segmentID+1,
|
2023-04-20 11:32:31 +08:00
|
|
|
4,
|
2023-03-27 00:42:00 +08:00
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
_, err = suite.loader.Load(ctx, suite.collectionID, SegmentTypeGrowing, 0, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: suite.segmentID + 1,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
})
|
|
|
|
suite.NoError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) TestLoadMultipleSegments() {
|
|
|
|
ctx := context.Background()
|
|
|
|
const SegmentNum = 5
|
|
|
|
loadInfos := make([]*querypb.SegmentLoadInfo, 0, SegmentNum)
|
|
|
|
|
|
|
|
// Load sealed
|
|
|
|
for i := 0; i < SegmentNum; i++ {
|
|
|
|
segmentID := suite.segmentID + int64(i)
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
loadInfos = append(loadInfos, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, loadInfos...)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
// Won't load bloom filter with sealed segments
|
|
|
|
for _, segment := range segments {
|
|
|
|
for pk := 0; pk < 100; pk++ {
|
|
|
|
exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk)))
|
|
|
|
suite.Require().False(exist)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load growing
|
|
|
|
loadInfos = loadInfos[:0]
|
|
|
|
for i := 0; i < SegmentNum; i++ {
|
|
|
|
segmentID := suite.segmentID + SegmentNum + int64(i)
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
loadInfos = append(loadInfos, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
segments, err = suite.loader.Load(ctx, suite.collectionID, SegmentTypeGrowing, 0, loadInfos...)
|
|
|
|
suite.NoError(err)
|
|
|
|
// Should load bloom filter with growing segments
|
|
|
|
for _, segment := range segments {
|
|
|
|
for pk := 0; pk < 100; pk++ {
|
|
|
|
exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk)))
|
|
|
|
suite.True(exist)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) TestLoadWithIndex() {
|
|
|
|
ctx := context.Background()
|
|
|
|
const SegmentNum = 5
|
|
|
|
loadInfos := make([]*querypb.SegmentLoadInfo, 0, SegmentNum)
|
|
|
|
|
|
|
|
// Load sealed
|
|
|
|
for i := 0; i < SegmentNum; i++ {
|
|
|
|
segmentID := suite.segmentID + int64(i)
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
2023-04-20 11:32:31 +08:00
|
|
|
vecFields := funcutil.GetVecFieldIDs(suite.schema)
|
2023-03-27 00:42:00 +08:00
|
|
|
indexInfo, err := GenAndSaveIndex(
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
2023-04-20 11:32:31 +08:00
|
|
|
vecFields[0],
|
2023-03-27 00:42:00 +08:00
|
|
|
100,
|
|
|
|
IndexFaissIVFFlat,
|
|
|
|
L2,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
loadInfos = append(loadInfos, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
IndexInfos: []*querypb.FieldIndexInfo{indexInfo},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, loadInfos...)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
2023-04-20 11:32:31 +08:00
|
|
|
vecFields := funcutil.GetVecFieldIDs(suite.schema)
|
2023-03-27 00:42:00 +08:00
|
|
|
for _, segment := range segments {
|
2023-04-20 11:32:31 +08:00
|
|
|
suite.True(segment.ExistIndex(vecFields[0]))
|
2023-03-27 00:42:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) TestLoadBloomFilter() {
|
|
|
|
ctx := context.Background()
|
|
|
|
const SegmentNum = 5
|
|
|
|
loadInfos := make([]*querypb.SegmentLoadInfo, 0, SegmentNum)
|
|
|
|
|
|
|
|
// Load sealed
|
|
|
|
for i := 0; i < SegmentNum; i++ {
|
|
|
|
segmentID := suite.segmentID + int64(i)
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
loadInfos = append(loadInfos, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
bfs, err := suite.loader.LoadBloomFilterSet(ctx, suite.collectionID, 0, loadInfos...)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
for _, bf := range bfs {
|
|
|
|
for pk := 0; pk < 100; pk++ {
|
|
|
|
exist := bf.MayPkExist(storage.NewInt64PrimaryKey(int64(pk)))
|
|
|
|
suite.Require().True(exist)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *SegmentLoaderSuite) TestLoadDeltaLogs() {
|
|
|
|
ctx := context.Background()
|
|
|
|
const SegmentNum = 5
|
|
|
|
loadInfos := make([]*querypb.SegmentLoadInfo, 0, SegmentNum)
|
|
|
|
|
|
|
|
// Load sealed
|
|
|
|
for i := 0; i < SegmentNum; i++ {
|
|
|
|
segmentID := suite.segmentID + int64(i)
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
// Delete PKs 1, 2
|
|
|
|
deltaLogs, err := SaveDeltaLog(suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
loadInfos = append(loadInfos, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
Deltalogs: deltaLogs,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeGrowing, 0, loadInfos...)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
for _, segment := range segments {
|
|
|
|
suite.Equal(int64(100-2), segment.RowNum())
|
|
|
|
for pk := 0; pk < 100; pk++ {
|
|
|
|
if pk == 1 || pk == 2 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
exist := segment.MayPkExist(storage.NewInt64PrimaryKey(int64(pk)))
|
|
|
|
suite.Require().True(exist)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-28 21:30:05 +08:00
|
|
|
func (suite *SegmentLoaderSuite) TestLoadWithMmap() {
|
|
|
|
key := paramtable.Get().QueryNodeCfg.MmapDirPath.Key
|
|
|
|
paramtable.Get().Save(key, "/tmp/mmap-test")
|
|
|
|
defer paramtable.Get().Reset(key)
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
// Load sealed
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
suite.segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
_, err = suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: suite.segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
})
|
|
|
|
suite.NoError(err)
|
|
|
|
}
|
|
|
|
|
2023-05-06 14:24:39 +08:00
|
|
|
func (suite *SegmentLoaderSuite) TestPatchEntryNum() {
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
segmentID := suite.segmentID
|
|
|
|
binlogs, statsLogs, err := SaveBinLog(ctx,
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
100,
|
|
|
|
suite.schema,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
vecFields := funcutil.GetVecFieldIDs(suite.schema)
|
|
|
|
indexInfo, err := GenAndSaveIndex(
|
|
|
|
suite.collectionID,
|
|
|
|
suite.partitionID,
|
|
|
|
segmentID,
|
|
|
|
vecFields[0],
|
|
|
|
100,
|
|
|
|
IndexFaissIVFFlat,
|
|
|
|
L2,
|
|
|
|
suite.chunkManager,
|
|
|
|
)
|
|
|
|
suite.NoError(err)
|
|
|
|
loadInfo := &querypb.SegmentLoadInfo{
|
|
|
|
SegmentID: segmentID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
BinlogPaths: binlogs,
|
|
|
|
Statslogs: statsLogs,
|
|
|
|
IndexInfos: []*querypb.FieldIndexInfo{indexInfo},
|
|
|
|
}
|
|
|
|
|
|
|
|
// mock legacy binlog entry num is zero case
|
|
|
|
for _, fieldLog := range binlogs {
|
|
|
|
for _, binlog := range fieldLog.GetBinlogs() {
|
|
|
|
binlog.EntriesNum = 0
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
segments, err := suite.loader.Load(ctx, suite.collectionID, SegmentTypeSealed, 0, loadInfo)
|
|
|
|
suite.Require().NoError(err)
|
|
|
|
suite.Require().Equal(1, len(segments))
|
|
|
|
|
|
|
|
segment := segments[0]
|
|
|
|
info := segment.GetIndex(vecFields[0])
|
|
|
|
suite.Require().NotNil(info)
|
|
|
|
|
|
|
|
for _, binlog := range info.FieldBinlog.GetBinlogs() {
|
|
|
|
suite.Greater(binlog.EntriesNum, int64(0))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-03-27 00:42:00 +08:00
|
|
|
func TestSegmentLoader(t *testing.T) {
|
|
|
|
suite.Run(t, &SegmentLoaderSuite{})
|
|
|
|
}
|