2022-05-23 15:47:58 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package indexnode
|
|
|
|
|
2023-12-13 17:24:38 +08:00
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
"github.com/apache/arrow/go/v12/arrow"
|
|
|
|
"github.com/apache/arrow/go/v12/arrow/array"
|
|
|
|
"github.com/apache/arrow/go/v12/arrow/memory"
|
|
|
|
"github.com/stretchr/testify/suite"
|
|
|
|
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
|
|
|
milvus_storage "github.com/milvus-io/milvus-storage/go/storage"
|
|
|
|
"github.com/milvus-io/milvus-storage/go/storage/options"
|
|
|
|
"github.com/milvus-io/milvus-storage/go/storage/schema"
|
2024-06-06 17:37:51 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/etcdpb"
|
2023-12-13 17:24:38 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
2024-06-06 17:37:51 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2023-12-13 17:24:38 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/dependency"
|
|
|
|
"github.com/milvus-io/milvus/internal/util/typeutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
2024-06-06 17:37:51 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/metautil"
|
2023-12-13 17:24:38 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/metric"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
2024-06-06 17:37:51 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
2023-12-13 17:24:38 +08:00
|
|
|
)
|
|
|
|
|
2024-06-06 17:37:51 +08:00
|
|
|
type IndexBuildTaskSuite struct {
|
|
|
|
suite.Suite
|
|
|
|
schema *schemapb.CollectionSchema
|
|
|
|
collectionID int64
|
|
|
|
partitionID int64
|
|
|
|
segmentID int64
|
|
|
|
dataPath string
|
|
|
|
|
|
|
|
numRows int
|
|
|
|
dim int
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskSuite) SetupSuite() {
|
|
|
|
paramtable.Init()
|
|
|
|
suite.collectionID = 1000
|
|
|
|
suite.partitionID = 1001
|
|
|
|
suite.segmentID = 1002
|
|
|
|
suite.dataPath = "/tmp/milvus/data/1000/1001/1002/3/1"
|
|
|
|
suite.numRows = 100
|
|
|
|
suite.dim = 128
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskSuite) SetupTest() {
|
|
|
|
suite.schema = &schemapb.CollectionSchema{
|
|
|
|
Name: "test",
|
|
|
|
Description: "test",
|
|
|
|
AutoID: false,
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: 100, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: 101, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
|
|
{FieldID: 102, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskSuite) serializeData() ([]*storage.Blob, error) {
|
|
|
|
insertCodec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{
|
|
|
|
Schema: suite.schema,
|
|
|
|
})
|
|
|
|
return insertCodec.Serialize(suite.partitionID, suite.segmentID, &storage.InsertData{
|
|
|
|
Data: map[storage.FieldID]storage.FieldData{
|
|
|
|
0: &storage.Int64FieldData{Data: generateLongs(suite.numRows)},
|
|
|
|
1: &storage.Int64FieldData{Data: generateLongs(suite.numRows)},
|
|
|
|
100: &storage.Int64FieldData{Data: generateLongs(suite.numRows)},
|
|
|
|
101: &storage.Int64FieldData{Data: generateLongs(suite.numRows)},
|
|
|
|
102: &storage.FloatVectorFieldData{Data: generateFloats(suite.numRows * suite.dim), Dim: suite.dim},
|
|
|
|
},
|
|
|
|
Infos: []storage.BlobInfo{{Length: suite.numRows}},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskSuite) TestBuildMemoryIndex() {
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
req := &indexpb.CreateJobRequest{
|
|
|
|
BuildID: 1,
|
|
|
|
IndexVersion: 1,
|
|
|
|
DataPaths: []string{suite.dataPath},
|
|
|
|
IndexID: 0,
|
|
|
|
IndexName: "",
|
|
|
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}},
|
|
|
|
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "128"}},
|
|
|
|
NumRows: int64(suite.numRows),
|
|
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
|
|
RootPath: "/tmp/milvus/data",
|
|
|
|
StorageType: "local",
|
|
|
|
},
|
|
|
|
CollectionID: 1,
|
|
|
|
PartitionID: 2,
|
|
|
|
SegmentID: 3,
|
|
|
|
FieldID: 102,
|
|
|
|
FieldName: "vec",
|
|
|
|
FieldType: schemapb.DataType_FloatVector,
|
|
|
|
}
|
|
|
|
|
|
|
|
cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig())
|
|
|
|
suite.NoError(err)
|
|
|
|
blobs, err := suite.serializeData()
|
|
|
|
suite.NoError(err)
|
|
|
|
err = cm.Write(ctx, suite.dataPath, blobs[0].Value)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
t := newIndexBuildTask(ctx, cancel, req, cm, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)))
|
|
|
|
|
|
|
|
err = t.PreExecute(context.Background())
|
|
|
|
suite.NoError(err)
|
|
|
|
err = t.Execute(context.Background())
|
|
|
|
suite.NoError(err)
|
|
|
|
err = t.PostExecute(context.Background())
|
|
|
|
suite.NoError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestIndexBuildTask(t *testing.T) {
|
|
|
|
suite.Run(t, new(IndexBuildTaskSuite))
|
|
|
|
}
|
|
|
|
|
2023-12-13 17:24:38 +08:00
|
|
|
type IndexBuildTaskV2Suite struct {
|
|
|
|
suite.Suite
|
|
|
|
schema *schemapb.CollectionSchema
|
|
|
|
arrowSchema *arrow.Schema
|
|
|
|
space *milvus_storage.Space
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskV2Suite) SetupSuite() {
|
|
|
|
paramtable.Init()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskV2Suite) SetupTest() {
|
|
|
|
suite.schema = &schemapb.CollectionSchema{
|
|
|
|
Name: "test",
|
|
|
|
Description: "test",
|
|
|
|
AutoID: false,
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{FieldID: 1, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: 2, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
|
|
{FieldID: 3, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
var err error
|
|
|
|
suite.arrowSchema, err = typeutil.ConvertToArrowSchema(suite.schema.Fields)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
tmpDir := suite.T().TempDir()
|
|
|
|
opt := options.NewSpaceOptionBuilder().
|
|
|
|
SetSchema(schema.NewSchema(
|
|
|
|
suite.arrowSchema,
|
|
|
|
&schema.SchemaOptions{
|
|
|
|
PrimaryColumn: "pk",
|
|
|
|
VectorColumn: "vec",
|
|
|
|
VersionColumn: "ts",
|
|
|
|
})).
|
|
|
|
Build()
|
|
|
|
suite.space, err = milvus_storage.Open("file://"+tmpDir, opt)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
b := array.NewRecordBuilder(memory.DefaultAllocator, suite.arrowSchema)
|
|
|
|
defer b.Release()
|
|
|
|
b.Field(0).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
|
|
|
b.Field(1).(*array.Int64Builder).AppendValues([]int64{1}, nil)
|
|
|
|
fb := b.Field(2).(*array.FixedSizeBinaryBuilder)
|
|
|
|
fb.Reserve(1)
|
|
|
|
fb.Append([]byte{1, 2, 3, 4})
|
|
|
|
|
|
|
|
rec := b.NewRecord()
|
|
|
|
defer rec.Release()
|
|
|
|
reader, err := array.NewRecordReader(suite.arrowSchema, []arrow.Record{rec})
|
|
|
|
suite.NoError(err)
|
|
|
|
err = suite.space.Write(reader, &options.DefaultWriteOptions)
|
|
|
|
suite.NoError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *IndexBuildTaskV2Suite) TestBuildIndex() {
|
|
|
|
req := &indexpb.CreateJobRequest{
|
|
|
|
BuildID: 1,
|
|
|
|
IndexVersion: 1,
|
|
|
|
IndexID: 0,
|
|
|
|
IndexName: "",
|
|
|
|
IndexParams: []*commonpb.KeyValuePair{{Key: common.IndexTypeKey, Value: "FLAT"}, {Key: common.MetricTypeKey, Value: metric.L2}, {Key: common.DimKey, Value: "1"}},
|
|
|
|
TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}},
|
|
|
|
NumRows: 10,
|
|
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
|
|
RootPath: "/tmp/milvus/data",
|
|
|
|
StorageType: "local",
|
|
|
|
},
|
2024-06-05 19:35:52 +08:00
|
|
|
CollectionID: 1,
|
|
|
|
PartitionID: 1,
|
|
|
|
SegmentID: 1,
|
|
|
|
FieldID: 3,
|
|
|
|
FieldName: "vec",
|
|
|
|
FieldType: schemapb.DataType_FloatVector,
|
2023-12-13 17:24:38 +08:00
|
|
|
StorePath: "file://" + suite.space.Path(),
|
|
|
|
StoreVersion: suite.space.GetCurrentVersion(),
|
|
|
|
IndexStorePath: "file://" + suite.space.Path(),
|
|
|
|
Dim: 4,
|
2024-01-24 00:04:55 +08:00
|
|
|
OptionalScalarFields: []*indexpb.OptionalFieldInfo{
|
|
|
|
{FieldID: 1, FieldName: "pk", FieldType: 5, DataIds: []int64{0}},
|
|
|
|
},
|
2023-12-13 17:24:38 +08:00
|
|
|
}
|
|
|
|
|
2024-06-05 19:35:52 +08:00
|
|
|
task := newIndexBuildTaskV2(context.Background(), nil, req, NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)))
|
2023-12-13 17:24:38 +08:00
|
|
|
|
|
|
|
var err error
|
2024-06-06 17:37:51 +08:00
|
|
|
err = task.PreExecute(context.Background())
|
2023-12-13 17:24:38 +08:00
|
|
|
suite.NoError(err)
|
2024-06-06 17:37:51 +08:00
|
|
|
err = task.Execute(context.Background())
|
2023-12-13 17:24:38 +08:00
|
|
|
suite.NoError(err)
|
2024-06-06 17:37:51 +08:00
|
|
|
err = task.PostExecute(context.Background())
|
2023-12-13 17:24:38 +08:00
|
|
|
suite.NoError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestIndexBuildTaskV2Suite(t *testing.T) {
|
|
|
|
suite.Run(t, new(IndexBuildTaskV2Suite))
|
|
|
|
}
|
2024-06-06 17:37:51 +08:00
|
|
|
|
|
|
|
type AnalyzeTaskSuite struct {
|
|
|
|
suite.Suite
|
|
|
|
schema *schemapb.CollectionSchema
|
|
|
|
collectionID int64
|
|
|
|
partitionID int64
|
|
|
|
segmentID int64
|
|
|
|
fieldID int64
|
|
|
|
taskID int64
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *AnalyzeTaskSuite) SetupSuite() {
|
|
|
|
paramtable.Init()
|
|
|
|
suite.collectionID = 1000
|
|
|
|
suite.partitionID = 1001
|
|
|
|
suite.segmentID = 1002
|
|
|
|
suite.fieldID = 102
|
|
|
|
suite.taskID = 1004
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *AnalyzeTaskSuite) SetupTest() {
|
|
|
|
suite.schema = &schemapb.CollectionSchema{
|
|
|
|
Name: "test",
|
|
|
|
Description: "test",
|
|
|
|
AutoID: false,
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{FieldID: common.RowIDField, Name: common.RowIDFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: common.TimeStampField, Name: common.TimeStampFieldName, DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: 100, Name: "pk", DataType: schemapb.DataType_Int64, IsPrimaryKey: true},
|
|
|
|
{FieldID: 101, Name: "ts", DataType: schemapb.DataType_Int64},
|
|
|
|
{FieldID: 102, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{{Key: "dim", Value: "1"}}},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *AnalyzeTaskSuite) serializeData() ([]*storage.Blob, error) {
|
|
|
|
insertCodec := storage.NewInsertCodecWithSchema(&etcdpb.CollectionMeta{
|
|
|
|
Schema: suite.schema,
|
|
|
|
})
|
|
|
|
return insertCodec.Serialize(suite.partitionID, suite.segmentID, &storage.InsertData{
|
|
|
|
Data: map[storage.FieldID]storage.FieldData{
|
|
|
|
0: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
|
|
1: &storage.Int64FieldData{Data: []int64{1, 2, 3}},
|
|
|
|
100: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
|
|
101: &storage.Int64FieldData{Data: []int64{0, 1, 2}},
|
|
|
|
102: &storage.FloatVectorFieldData{Data: []float32{1, 2, 3}, Dim: 1},
|
|
|
|
},
|
|
|
|
Infos: []storage.BlobInfo{{Length: 3}},
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (suite *AnalyzeTaskSuite) TestAnalyze() {
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
|
|
req := &indexpb.AnalyzeRequest{
|
|
|
|
ClusterID: "test",
|
|
|
|
TaskID: 1,
|
|
|
|
CollectionID: suite.collectionID,
|
|
|
|
PartitionID: suite.partitionID,
|
|
|
|
FieldID: suite.fieldID,
|
|
|
|
FieldName: "vec",
|
|
|
|
FieldType: schemapb.DataType_FloatVector,
|
|
|
|
SegmentStats: map[int64]*indexpb.SegmentStats{
|
|
|
|
suite.segmentID: {
|
|
|
|
ID: suite.segmentID,
|
|
|
|
NumRows: 1024,
|
|
|
|
LogIDs: []int64{1},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
Version: 1,
|
|
|
|
StorageConfig: &indexpb.StorageConfig{
|
|
|
|
RootPath: "/tmp/milvus/data",
|
|
|
|
StorageType: "local",
|
|
|
|
},
|
|
|
|
Dim: 1,
|
|
|
|
}
|
|
|
|
|
|
|
|
cm, err := NewChunkMgrFactory().NewChunkManager(ctx, req.GetStorageConfig())
|
|
|
|
suite.NoError(err)
|
|
|
|
blobs, err := suite.serializeData()
|
|
|
|
suite.NoError(err)
|
|
|
|
dataPath := metautil.BuildInsertLogPath(cm.RootPath(), suite.collectionID, suite.partitionID, suite.segmentID,
|
|
|
|
suite.fieldID, 1)
|
|
|
|
|
|
|
|
err = cm.Write(ctx, dataPath, blobs[0].Value)
|
|
|
|
suite.NoError(err)
|
|
|
|
|
|
|
|
t := &analyzeTask{
|
|
|
|
ident: "",
|
|
|
|
cancel: cancel,
|
|
|
|
ctx: ctx,
|
|
|
|
req: req,
|
|
|
|
tr: timerecord.NewTimeRecorder("test-indexBuildTask"),
|
|
|
|
queueDur: 0,
|
|
|
|
node: NewIndexNode(context.Background(), dependency.NewDefaultFactory(true)),
|
|
|
|
}
|
|
|
|
|
|
|
|
err = t.PreExecute(context.Background())
|
|
|
|
suite.NoError(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
func TestAnalyzeTaskSuite(t *testing.T) {
|
|
|
|
suite.Run(t, new(AnalyzeTaskSuite))
|
|
|
|
}
|