// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package indexnode import ( "context" "testing" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/proto/etcdpb" "github.com/milvus-io/milvus/internal/proto/indexpb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/dependency" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/metautil" "github.com/milvus-io/milvus/pkg/util/metric" "github.com/milvus-io/milvus/pkg/util/paramtable" "github.com/milvus-io/milvus/pkg/util/timerecord" ) type IndexBuildTaskSuite struct { suite.Suite schema *schemapb.CollectionSchema collectionID int64 partitionID int64 segmentID int64 dataPath string numRows int dim int } func (suite *IndexBuildTaskSuite) SetupSuite() { paramtable.Init() suite.collectionID = 1000 suite.partitionID = 1001 suite.segmentID = 1002 suite.dataPath = "/tmp/milvus/data/1000/1001/1002/3/1" suite.numRows = 100 suite.dim = 128 } func (suite *IndexBuildTaskSuite) SetupTest() { suite.schema = &schemapb.CollectionSchema{ Name: "test", Description: "test", AutoID: false, Fields: []*schemapb.FieldSchema{ {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, {FieldID: 100, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, {FieldID: 101, Name: "ts", DataType: schemapb.DataType_Int64}, {FieldID: 102, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}}}, }, } } func (suite *IndexBuildTaskSuite) serializeData() ([]*storage.Blob, error) { insertCodec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{ Schema: suite.schema, }) return insertCodec.Serialize(suite.partitionID, suite.segmentID, &storage.InsertData{ Data: map[storage.FieldID]storage.FieldData{ 0: &storage.Int64FieldData{Data: generateLongs(suite.numRows)}, 1: &storage.Int64FieldData{Data: generateLongs(suite.numRows)}, 100: &storage.Int64FieldData{Data: generateLongs(suite.numRows)}, 101: &storage.Int64FieldData{Data: generateLongs(suite.numRows)}, 102: &storage.FloatVectorFieldData{Data: generateFloats(suite.numRows * suite.dim), Dim: suite.dim}, }, Infos: []storage.BlobInfo{{Length: suite.numRows}}, }) } func (suite *IndexBuildTaskSuite) TestBuildMemoryIndex() { ctx, cancel := context.WithCancel(context.Background()) req := &indexpb.CreateJobRequest{ BuildID: 1, IndexVersion: 1, DataPaths: []string{suite.dataPath}, IndexID: 0, IndexName: "", IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}}, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}}, NumRows: int64(suite.numRows), StorageConfig: &indexpb.StorageConfig{ RootPath: "/tmp/milvus/data", StorageType: "local", }, CollectionID: 1, PartitionID: 2, SegmentID: 3, FieldID: 102, FieldName: "vec", FieldType: schemapb.DataType_FloatVector, } cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig()) suite.NoError(err) blobs, err := suite.serializeData() suite.NoError(err) err = cm.Write(ctx, suite.dataPath, blobs[0].Value) suite.NoError(err) t := newIndexBuildTask(ctx, cancel, req, cm, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true))) err = t.PreExecute(context.Background()) suite.NoError(err) err = t.Execute(context.Background()) suite.NoError(err) err = t.PostExecute(context.Background()) suite.NoError(err) } func TestIndexBuildTask(t *testing.T) { suite.Run(t, new(IndexBuildTaskSuite)) } type AnalyzeTaskSuite struct { suite.Suite schema *schemapb.CollectionSchema collectionID int64 partitionID int64 segmentID int64 fieldID int64 taskID int64 } func (suite *AnalyzeTaskSuite) SetupSuite() { paramtable.Init() suite.collectionID = 1000 suite.partitionID = 1001 suite.segmentID = 1002 suite.fieldID = 102 suite.taskID = 1004 } func (suite *AnalyzeTaskSuite) SetupTest() { suite.schema = &schemapb.CollectionSchema{ Name: "test", Description: "test", AutoID: false, Fields: []*schemapb.FieldSchema{ {FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, {FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, {FieldID: 100, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true}, {FieldID: 101, Name: "ts", DataType: schemapb.DataType_Int64}, {FieldID: 102, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}}, }, } } func (suite *AnalyzeTaskSuite) serializeData() ([]*storage.Blob, error) { insertCodec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{ Schema: suite.schema, }) return insertCodec.Serialize(suite.partitionID, suite.segmentID, &storage.InsertData{ Data: map[storage.FieldID]storage.FieldData{ 0: &storage.Int64FieldData{Data: []int64{0, 1, 2}}, 1: &storage.Int64FieldData{Data: []int64{1, 2, 3}}, 100: &storage.Int64FieldData{Data: []int64{0, 1, 2}}, 101: &storage.Int64FieldData{Data: []int64{0, 1, 2}}, 102: &storage.FloatVectorFieldData{Data: []float32{1, 2, 3}, Dim: 1}, }, Infos: []storage.BlobInfo{{Length: 3}}, }) } func (suite *AnalyzeTaskSuite) TestAnalyze() { ctx, cancel := context.WithCancel(context.Background()) req := &indexpb.AnalyzeRequest{ ClusterID: "test", TaskID: 1, CollectionID: suite.collectionID, PartitionID: suite.partitionID, FieldID: suite.fieldID, FieldName: "vec", FieldType: schemapb.DataType_FloatVector, SegmentStats: map[int64]*indexpb.SegmentStats{ suite.segmentID: { ID: suite.segmentID, NumRows: 1024, LogIDs: []int64{1}, }, }, Version: 1, StorageConfig: &indexpb.StorageConfig{ RootPath: "/tmp/milvus/data", StorageType: "local", }, Dim: 1, } cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig()) suite.NoError(err) blobs, err := suite.serializeData() suite.NoError(err) dataPath := metautil.BuildInsertLogPath(cm.RootPath(), suite.collectionID, suite.partitionID, suite.segmentID, suite.fieldID, 1) err = cm.Write(ctx, dataPath, blobs[0].Value) suite.NoError(err) t := &analyzeTask{ ident: "", cancel: cancel, ctx: ctx, req: req, tr: timerecord.NewTimeRecorder("test-indexBuildTask"), queueDur: 0, node: NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)), } err = t.PreExecute(context.Background()) suite.NoError(err) } func TestAnalyzeTaskSuite(t *testing.T) { suite.Run(t, new(AnalyzeTaskSuite)) }