// Licensed to the LF AI & Data foundation under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package importv2 import ( "context" "encoding/json" "io" "strconv" "strings" "sync" "testing" "time" "github.com/cockroachdb/errors" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/suite" "github.com/milvus-io/milvus-proto/go-api/v2/commonpb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus/internal/flushcommon/syncmgr" "github.com/milvus-io/milvus/internal/mocks" "github.com/milvus-io/milvus/internal/proto/datapb" "github.com/milvus-io/milvus/internal/proto/internalpb" "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/util/importutilv2" "github.com/milvus-io/milvus/internal/util/testutil" "github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/util/conc" "github.com/milvus-io/milvus/pkg/util/paramtable" ) type sampleRow struct { FieldString string `json:"pk,omitempty"` FieldInt64 int64 `json:"int64,omitempty"` FieldFloatVector []float32 `json:"vec,omitempty"` } type sampleContent struct { Rows []sampleRow `json:"rows,omitempty"` } type mockReader struct { io.Reader io.Closer io.ReaderAt io.Seeker } type SchedulerSuite struct { suite.Suite numRows int schema *schemapb.CollectionSchema cm storage.ChunkManager reader *importutilv2.MockReader syncMgr *syncmgr.MockSyncManager manager TaskManager scheduler *scheduler } func (s *SchedulerSuite) SetupSuite() { paramtable.Init() } func (s *SchedulerSuite) SetupTest() { s.numRows = 100 s.schema = &schemapb.CollectionSchema{ Fields: []*schemapb.FieldSchema{ { FieldID: 100, Name: "pk", IsPrimaryKey: true, DataType: schemapb.DataType_VarChar, TypeParams: []*commonpb.KeyValuePair{ {Key: common.MaxLengthKey, Value: "128"}, }, }, { FieldID: 101, Name: "vec", DataType: schemapb.DataType_FloatVector, TypeParams: []*commonpb.KeyValuePair{ { Key: common.DimKey, Value: "4", }, }, }, { FieldID: 102, Name: "int64", DataType: schemapb.DataType_Int64, }, }, } s.manager = NewTaskManager() s.syncMgr = syncmgr.NewMockSyncManager(s.T()) s.scheduler = NewScheduler(s.manager).(*scheduler) } func (s *SchedulerSuite) TestScheduler_Slots() { preimportReq := &datapb.PreImportRequest{ JobID: 1, TaskID: 2, CollectionID: 3, PartitionIDs: []int64{4}, Vchannels: []string{"ch-0"}, Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) slots := s.scheduler.Slots() s.Equal(paramtable.Get().DataNodeCfg.MaxConcurrentImportTaskNum.GetAsInt64()-1, slots) } func (s *SchedulerSuite) TestScheduler_Start_Preimport() { content := &sampleContent{ Rows: make([]sampleRow, 0), } for i := 0; i < 10; i++ { row := sampleRow{ FieldString: "No." + strconv.FormatInt(int64(i), 10), FieldInt64: int64(99999999999999999 + i), FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4}, } content.Rows = append(content.Rows, row) } bytes, err := json.Marshal(content) s.NoError(err) cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) s.cm = cm preimportReq := &datapb.PreImportRequest{ JobID: 1, TaskID: 2, CollectionID: 3, PartitionIDs: []int64{4}, Vchannels: []string{"ch-0"}, Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) go s.scheduler.Start() defer s.scheduler.Close() s.Eventually(func() bool { return s.manager.Get(preimportTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Completed }, 10*time.Second, 100*time.Millisecond) } func (s *SchedulerSuite) TestScheduler_Start_Preimport_Failed() { content := &sampleContent{ Rows: make([]sampleRow, 0), } for i := 0; i < 10; i++ { var row sampleRow if i == 0 { // make rows not consistent row = sampleRow{ FieldString: "No." + strconv.FormatInt(int64(i), 10), FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4}, } } else { row = sampleRow{ FieldString: "No." + strconv.FormatInt(int64(i), 10), FieldInt64: int64(99999999999999999 + i), FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4}, } } content.Rows = append(content.Rows, row) } bytes, err := json.Marshal(content) s.NoError(err) cm := mocks.NewChunkManager(s.T()) type mockReader struct { io.Reader io.Closer io.ReaderAt io.Seeker } ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Size(mock.Anything, mock.Anything).Return(1024, nil) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) s.cm = cm preimportReq := &datapb.PreImportRequest{ JobID: 1, TaskID: 2, CollectionID: 3, PartitionIDs: []int64{4}, Vchannels: []string{"ch-0"}, Schema: s.schema, ImportFiles: []*internalpb.ImportFile{{Paths: []string{"dummy.json"}}}, } preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) go s.scheduler.Start() defer s.scheduler.Close() s.Eventually(func() bool { return s.manager.Get(preimportTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Failed }, 10*time.Second, 100*time.Millisecond) } func (s *SchedulerSuite) TestScheduler_Start_Import() { content := &sampleContent{ Rows: make([]sampleRow, 0), } for i := 0; i < 10; i++ { row := sampleRow{ FieldString: "No." + strconv.FormatInt(int64(i), 10), FieldInt64: int64(99999999999999999 + i), FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4}, } content.Rows = append(content.Rows, row) } bytes, err := json.Marshal(content) s.NoError(err) cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) s.cm = cm s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task, callbacks ...func(error) error) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { return struct{}{}, nil }) return future }) importReq := &datapb.ImportRequest{ JobID: 10, TaskID: 11, CollectionID: 12, PartitionIDs: []int64{13}, Vchannels: []string{"v0"}, Schema: s.schema, Files: []*internalpb.ImportFile{ { Paths: []string{"dummy.json"}, }, }, Ts: 1000, IDRange: &datapb.IDRange{ Begin: 0, End: int64(s.numRows), }, RequestSegments: []*datapb.ImportRequestSegment{ { SegmentID: 14, PartitionID: 13, Vchannel: "v0", }, }, } importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) go s.scheduler.Start() defer s.scheduler.Close() s.Eventually(func() bool { return s.manager.Get(importTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Completed }, 10*time.Second, 100*time.Millisecond) } func (s *SchedulerSuite) TestScheduler_Start_Import_Failed() { content := &sampleContent{ Rows: make([]sampleRow, 0), } for i := 0; i < 10; i++ { row := sampleRow{ FieldString: "No." + strconv.FormatInt(int64(i), 10), FieldInt64: int64(99999999999999999 + i), FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4}, } content.Rows = append(content.Rows, row) } bytes, err := json.Marshal(content) s.NoError(err) cm := mocks.NewChunkManager(s.T()) ioReader := strings.NewReader(string(bytes)) cm.EXPECT().Reader(mock.Anything, mock.Anything).Return(&mockReader{Reader: ioReader}, nil) s.cm = cm s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task, callbacks ...func(error) error) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { return struct{}{}, errors.New("mock err") }) return future }) importReq := &datapb.ImportRequest{ JobID: 10, TaskID: 11, CollectionID: 12, PartitionIDs: []int64{13}, Vchannels: []string{"v0"}, Schema: s.schema, Files: []*internalpb.ImportFile{ { Paths: []string{"dummy.json"}, }, }, Ts: 1000, IDRange: &datapb.IDRange{ Begin: 0, End: int64(s.numRows), }, RequestSegments: []*datapb.ImportRequestSegment{ { SegmentID: 14, PartitionID: 13, Vchannel: "v0", }, }, } importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) go s.scheduler.Start() defer s.scheduler.Close() s.Eventually(func() bool { return s.manager.Get(importTask.GetTaskID()).GetState() == datapb.ImportTaskStateV2_Failed }, 10*time.Second, 100*time.Millisecond) } func (s *SchedulerSuite) TestScheduler_ReadFileStat() { importFile := &internalpb.ImportFile{ Paths: []string{"dummy.json"}, } var once sync.Once data, err := testutil.CreateInsertData(s.schema, s.numRows) s.NoError(err) s.reader = importutilv2.NewMockReader(s.T()) s.reader.EXPECT().Size().Return(1024, nil) s.reader.EXPECT().Read().RunAndReturn(func() (*storage.InsertData, error) { var res *storage.InsertData once.Do(func() { res = data }) if res != nil { return res, nil } return nil, io.EOF }) preimportReq := &datapb.PreImportRequest{ JobID: 1, TaskID: 2, CollectionID: 3, PartitionIDs: []int64{4}, Vchannels: []string{"ch-0"}, Schema: s.schema, ImportFiles: []*internalpb.ImportFile{importFile}, } preimportTask := NewPreImportTask(preimportReq, s.manager, s.cm) s.manager.Add(preimportTask) err = preimportTask.(*PreImportTask).readFileStat(s.reader, 0) s.NoError(err) } func (s *SchedulerSuite) TestScheduler_ImportFile() { s.syncMgr.EXPECT().SyncData(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, task syncmgr.Task, callbacks ...func(error) error) *conc.Future[struct{}] { future := conc.Go(func() (struct{}, error) { return struct{}{}, nil }) return future }) var once sync.Once data, err := testutil.CreateInsertData(s.schema, s.numRows) s.NoError(err) s.reader = importutilv2.NewMockReader(s.T()) s.reader.EXPECT().Read().RunAndReturn(func() (*storage.InsertData, error) { var res *storage.InsertData once.Do(func() { res = data }) if res != nil { return res, nil } return nil, io.EOF }) importReq := &datapb.ImportRequest{ JobID: 10, TaskID: 11, CollectionID: 12, PartitionIDs: []int64{13}, Vchannels: []string{"v0"}, Schema: s.schema, Files: []*internalpb.ImportFile{ { Paths: []string{"dummy.json"}, }, }, Ts: 1000, IDRange: &datapb.IDRange{ Begin: 0, End: int64(s.numRows), }, RequestSegments: []*datapb.ImportRequestSegment{ { SegmentID: 14, PartitionID: 13, Vchannel: "v0", }, }, } importTask := NewImportTask(importReq, s.manager, s.syncMgr, s.cm) s.manager.Add(importTask) err = importTask.(*ImportTask).importFile(s.reader) s.NoError(err) } func TestScheduler(t *testing.T) { suite.Run(t, new(SchedulerSuite)) }