2022-09-30 10:32:54 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2022-03-30 16:25:30 +08:00
|
|
|
package importutil
|
|
|
|
|
|
|
|
import (
|
2022-04-12 22:19:34 +08:00
|
|
|
"bufio"
|
|
|
|
"bytes"
|
2022-03-30 16:25:30 +08:00
|
|
|
"context"
|
2022-04-01 10:07:28 +08:00
|
|
|
"encoding/json"
|
2022-09-26 18:06:54 +08:00
|
|
|
"errors"
|
2022-10-27 16:21:34 +08:00
|
|
|
"math"
|
|
|
|
"os"
|
2022-11-21 10:19:10 +08:00
|
|
|
"path"
|
2022-04-01 10:07:28 +08:00
|
|
|
"strconv"
|
2022-03-30 16:25:30 +08:00
|
|
|
"testing"
|
2022-07-22 22:10:28 +08:00
|
|
|
"time"
|
2022-03-30 16:25:30 +08:00
|
|
|
|
2022-04-12 22:19:34 +08:00
|
|
|
"github.com/stretchr/testify/assert"
|
2022-05-06 11:21:50 +08:00
|
|
|
"golang.org/x/exp/mmap"
|
2022-04-12 22:19:34 +08:00
|
|
|
|
2022-10-16 20:49:27 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/schemapb"
|
2022-03-30 16:25:30 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/common"
|
2022-10-27 16:21:34 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/datapb"
|
2022-04-21 21:37:42 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
|
2022-03-30 16:25:30 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/storage"
|
2022-04-01 10:07:28 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/util/timerecord"
|
2022-03-30 16:25:30 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
TempFilesPath = "/tmp/milvus_test/import/"
|
|
|
|
)
|
|
|
|
|
2022-05-06 11:21:50 +08:00
|
|
|
type MockChunkManager struct {
|
2022-09-30 10:32:54 +08:00
|
|
|
size int64
|
|
|
|
sizeErr error
|
|
|
|
readBuf map[string][]byte
|
|
|
|
readErr error
|
|
|
|
listResult map[string][]string
|
|
|
|
listErr error
|
2022-05-06 11:21:50 +08:00
|
|
|
}
|
|
|
|
|
2022-08-25 19:32:53 +08:00
|
|
|
func (mc *MockChunkManager) RootPath() string {
|
|
|
|
return TempFilesPath
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Path(ctx context.Context, filePath string) (string, error) {
|
2022-05-06 11:21:50 +08:00
|
|
|
return "", nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Reader(ctx context.Context, filePath string) (storage.FileReader, error) {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Write(ctx context.Context, filePath string, content []byte) error {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) MultiWrite(ctx context.Context, contents map[string][]byte) error {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Exist(ctx context.Context, filePath string) (bool, error) {
|
2022-05-16 19:25:55 +08:00
|
|
|
return true, nil
|
2022-05-06 11:21:50 +08:00
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Read(ctx context.Context, filePath string) ([]byte, error) {
|
2022-09-30 10:32:54 +08:00
|
|
|
if mc.readErr != nil {
|
|
|
|
return nil, mc.readErr
|
|
|
|
}
|
|
|
|
|
|
|
|
val, ok := mc.readBuf[filePath]
|
|
|
|
if !ok {
|
|
|
|
return nil, errors.New("mock chunk manager: file path not found: " + filePath)
|
|
|
|
}
|
|
|
|
|
|
|
|
return val, nil
|
2022-05-06 11:21:50 +08:00
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) MultiRead(ctx context.Context, filePaths []string) ([][]byte, error) {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) ListWithPrefix(ctx context.Context, prefix string, recursive bool) ([]string, []time.Time, error) {
|
2022-09-30 10:32:54 +08:00
|
|
|
if mc.listErr != nil {
|
|
|
|
return nil, nil, mc.listErr
|
|
|
|
}
|
|
|
|
|
|
|
|
result, ok := mc.listResult[prefix]
|
|
|
|
if ok {
|
|
|
|
return result, nil, nil
|
|
|
|
}
|
|
|
|
|
2022-07-22 22:10:28 +08:00
|
|
|
return nil, nil, nil
|
2022-05-06 11:21:50 +08:00
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) ReadWithPrefix(ctx context.Context, prefix string) ([]string, [][]byte, error) {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil, nil, nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) ReadAt(ctx context.Context, filePath string, off int64, length int64) ([]byte, error) {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Mmap(ctx context.Context, filePath string) (*mmap.ReaderAt, error) {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil, nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Size(ctx context.Context, filePath string) (int64, error) {
|
2022-09-30 10:32:54 +08:00
|
|
|
if mc.sizeErr != nil {
|
|
|
|
return 0, mc.sizeErr
|
|
|
|
}
|
|
|
|
|
2022-05-06 11:21:50 +08:00
|
|
|
return mc.size, nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) Remove(ctx context.Context, filePath string) error {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) MultiRemove(ctx context.Context, filePaths []string) error {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-09-29 16:18:56 +08:00
|
|
|
func (mc *MockChunkManager) RemoveWithPrefix(ctx context.Context, prefix string) error {
|
2022-05-06 11:21:50 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
type rowCounterTest struct {
|
|
|
|
rowCount int
|
|
|
|
callTime int
|
|
|
|
}
|
|
|
|
|
|
|
|
func createMockCallbackFunctions(t *testing.T, rowCounter *rowCounterTest) (AssignSegmentFunc, CreateBinlogsFunc, SaveSegmentFunc) {
|
|
|
|
createBinlogFunc := func(fields map[storage.FieldID]storage.FieldData, segmentID int64) ([]*datapb.FieldBinlog, []*datapb.FieldBinlog, error) {
|
|
|
|
count := 0
|
|
|
|
for _, data := range fields {
|
|
|
|
assert.Less(t, 0, data.RowNum())
|
|
|
|
if count == 0 {
|
|
|
|
count = data.RowNum()
|
|
|
|
} else {
|
|
|
|
assert.Equal(t, count, data.RowNum())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
rowCounter.rowCount += count
|
|
|
|
rowCounter.callTime++
|
|
|
|
return nil, nil, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
assignSegmentFunc := func(shardID int) (int64, string, error) {
|
|
|
|
return 100, "ch", nil
|
|
|
|
}
|
|
|
|
|
|
|
|
saveSegmentFunc := func(fieldsInsert []*datapb.FieldBinlog, fieldsStats []*datapb.FieldBinlog, segmentID int64, targetChName string, rowCount int64) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return assignSegmentFunc, createBinlogFunc, saveSegmentFunc
|
|
|
|
}
|
|
|
|
|
2022-03-30 16:25:30 +08:00
|
|
|
func Test_NewImportWrapper(t *testing.T) {
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
2022-03-30 16:25:30 +08:00
|
|
|
ctx := context.Background()
|
2022-09-23 14:40:51 +08:00
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, nil, 2, 1, nil, cm, nil, nil)
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.Nil(t, wrapper)
|
|
|
|
|
|
|
|
schema := &schemapb.CollectionSchema{
|
|
|
|
Name: "schema",
|
|
|
|
Description: "schema",
|
|
|
|
AutoID: true,
|
|
|
|
Fields: make([]*schemapb.FieldSchema, 0),
|
|
|
|
}
|
|
|
|
schema.Fields = append(schema.Fields, sampleSchema().Fields...)
|
|
|
|
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
|
|
|
|
FieldID: 106,
|
|
|
|
Name: common.RowIDFieldName,
|
|
|
|
IsPrimaryKey: true,
|
|
|
|
AutoID: false,
|
|
|
|
Description: "int64",
|
|
|
|
DataType: schemapb.DataType_Int64,
|
|
|
|
})
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper = NewImportWrapper(ctx, schema, 2, 1, nil, cm, nil, nil)
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.NotNil(t, wrapper)
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
assignSegFunc := func(shardID int) (int64, string, error) {
|
|
|
|
return 0, "", nil
|
|
|
|
}
|
|
|
|
createBinFunc := func(fields map[storage.FieldID]storage.FieldData, segmentID int64) ([]*datapb.FieldBinlog, []*datapb.FieldBinlog, error) {
|
|
|
|
return nil, nil, nil
|
|
|
|
}
|
|
|
|
saveBinFunc := func(fieldsInsert []*datapb.FieldBinlog, fieldsStats []*datapb.FieldBinlog, segmentID int64, targetChName string, rowCount int64) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
err = wrapper.SetCallbackFunctions(assignSegFunc, createBinFunc, saveBinFunc)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
err = wrapper.SetCallbackFunctions(assignSegFunc, createBinFunc, nil)
|
|
|
|
assert.NotNil(t, err)
|
|
|
|
err = wrapper.SetCallbackFunctions(assignSegFunc, nil, nil)
|
|
|
|
assert.NotNil(t, err)
|
|
|
|
err = wrapper.SetCallbackFunctions(nil, nil, nil)
|
|
|
|
assert.NotNil(t, err)
|
|
|
|
|
2022-04-12 22:19:34 +08:00
|
|
|
err = wrapper.Cancel()
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.Nil(t, err)
|
|
|
|
}
|
|
|
|
|
2022-09-30 10:32:54 +08:00
|
|
|
func Test_ImportWrapperRowBased(t *testing.T) {
|
2022-10-27 16:21:34 +08:00
|
|
|
err := os.MkdirAll(TempFilesPath, os.ModePerm)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
defer os.RemoveAll(TempFilesPath)
|
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
2022-03-30 16:25:30 +08:00
|
|
|
ctx := context.Background()
|
2022-09-23 14:40:51 +08:00
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
|
|
|
content := []byte(`{
|
|
|
|
"rows":[
|
2022-11-16 19:05:08 +08:00
|
|
|
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]},
|
|
|
|
{"FieldBool": false, "FieldInt8": 11, "FieldInt16": 102, "FieldInt32": 1002, "FieldInt64": 10002, "FieldFloat": 3.15, "FieldDouble": 2.56, "FieldString": "hello world", "FieldBinaryVector": [253, 0], "FieldFloatVector": [2.1, 2.2, 2.3, 2.4]},
|
|
|
|
{"FieldBool": true, "FieldInt8": 12, "FieldInt16": 103, "FieldInt32": 1003, "FieldInt64": 10003, "FieldFloat": 3.16, "FieldDouble": 3.56, "FieldString": "hello world", "FieldBinaryVector": [252, 0], "FieldFloatVector": [3.1, 3.2, 3.3, 3.4]},
|
|
|
|
{"FieldBool": false, "FieldInt8": 13, "FieldInt16": 104, "FieldInt32": 1004, "FieldInt64": 10004, "FieldFloat": 3.17, "FieldDouble": 4.56, "FieldString": "hello world", "FieldBinaryVector": [251, 0], "FieldFloatVector": [4.1, 4.2, 4.3, 4.4]},
|
|
|
|
{"FieldBool": true, "FieldInt8": 14, "FieldInt16": 105, "FieldInt32": 1005, "FieldInt64": 10005, "FieldFloat": 3.18, "FieldDouble": 5.56, "FieldString": "hello world", "FieldBinaryVector": [250, 0], "FieldFloatVector": [5.1, 5.2, 5.3, 5.4]}
|
2022-03-30 16:25:30 +08:00
|
|
|
]
|
|
|
|
}`)
|
|
|
|
|
|
|
|
filePath := TempFilesPath + "rows_1.json"
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-11-21 10:19:10 +08:00
|
|
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
2022-03-30 16:25:30 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
|
|
|
// success case
|
2022-04-21 21:37:42 +08:00
|
|
|
importResult := &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
2022-03-30 16:25:30 +08:00
|
|
|
files := make([]string, 0)
|
|
|
|
files = append(files, filePath)
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(files, ImportOptions{OnlyValidate: true})
|
|
|
|
assert.Nil(t, err)
|
|
|
|
assert.Equal(t, 0, rowCounter.rowCount)
|
|
|
|
|
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.Nil(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
assert.Equal(t, 5, rowCounter.rowCount)
|
2022-04-21 21:37:42 +08:00
|
|
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
|
|
|
// parse error
|
|
|
|
content = []byte(`{
|
|
|
|
"rows":[
|
2022-11-16 19:05:08 +08:00
|
|
|
{"FieldBool": true, "FieldInt8": false, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]},
|
2022-03-30 16:25:30 +08:00
|
|
|
]
|
|
|
|
}`)
|
|
|
|
|
|
|
|
filePath = TempFilesPath + "rows_2.json"
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
2022-04-21 21:37:42 +08:00
|
|
|
importResult.State = commonpb.ImportState_ImportStarted
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
2022-03-30 16:25:30 +08:00
|
|
|
files = make([]string, 0)
|
|
|
|
files = append(files, filePath)
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(files, ImportOptions{OnlyValidate: true})
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.NotNil(t, err)
|
2022-04-21 21:37:42 +08:00
|
|
|
assert.NotEqual(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
|
|
|
// file doesn't exist
|
|
|
|
files = make([]string, 0)
|
|
|
|
files = append(files, "/dummy/dummy.json")
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(files, ImportOptions{OnlyValidate: true})
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.NotNil(t, err)
|
|
|
|
}
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
func createSampleNumpyFiles(t *testing.T, cm storage.ChunkManager) []string {
|
2022-03-30 16:25:30 +08:00
|
|
|
ctx := context.Background()
|
2022-10-27 16:21:34 +08:00
|
|
|
files := make([]string, 0)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath := path.Join(cm.RootPath(), "FieldBool.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err := CreateNumpyData([]bool{true, false, true, true, true})
|
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-03-30 16:25:30 +08:00
|
|
|
files = append(files, filePath)
|
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldInt8.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]int8{10, 11, 12, 13, 14})
|
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-04-03 11:27:29 +08:00
|
|
|
files = append(files, filePath)
|
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldInt16.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]int16{100, 101, 102, 103, 104})
|
|
|
|
assert.Nil(t, err)
|
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-05-13 16:07:54 +08:00
|
|
|
assert.NoError(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
files = append(files, filePath)
|
2022-05-13 16:07:54 +08:00
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldInt32.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]int32{1000, 1001, 1002, 1003, 1004})
|
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-05-13 16:07:54 +08:00
|
|
|
assert.NoError(t, err)
|
|
|
|
files = append(files, filePath)
|
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldInt64.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]int64{10000, 10001, 10002, 10003, 10004})
|
|
|
|
assert.Nil(t, err)
|
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
files = append(files, filePath)
|
2022-04-03 11:27:29 +08:00
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldFloat.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]float32{3.14, 3.15, 3.16, 3.17, 3.18})
|
|
|
|
assert.Nil(t, err)
|
|
|
|
err = cm.Write(ctx, filePath, content)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
files = append(files, filePath)
|
2022-04-03 11:27:29 +08:00
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldDouble.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]float64{5.1, 5.2, 5.3, 5.4, 5.5})
|
|
|
|
assert.Nil(t, err)
|
|
|
|
err = cm.Write(ctx, filePath, content)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
files = append(files, filePath)
|
2022-04-03 11:27:29 +08:00
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldString.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([]string{"a", "bb", "ccc", "dd", "e"})
|
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-04-03 11:27:29 +08:00
|
|
|
files = append(files, filePath)
|
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldBinaryVector.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([][2]uint8{{1, 2}, {3, 4}, {5, 6}, {7, 8}, {9, 10}})
|
2022-04-03 11:27:29 +08:00
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-04-03 11:27:29 +08:00
|
|
|
files = append(files, filePath)
|
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath = path.Join(cm.RootPath(), "FieldFloatVector.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
content, err = CreateNumpyData([][4]float32{{1, 2, 3, 4}, {3, 4, 5, 6}, {5, 6, 7, 8}, {7, 8, 9, 10}, {9, 10, 11, 12}})
|
2022-04-03 11:27:29 +08:00
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-04-03 11:27:29 +08:00
|
|
|
files = append(files, filePath)
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
return files
|
|
|
|
}
|
|
|
|
|
|
|
|
func Test_ImportWrapperColumnBased_numpy(t *testing.T) {
|
|
|
|
err := os.MkdirAll(TempFilesPath, os.ModePerm)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
defer os.RemoveAll(TempFilesPath)
|
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
2022-10-27 16:21:34 +08:00
|
|
|
ctx := context.Background()
|
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
|
|
|
assert.NoError(t, err)
|
2022-11-21 10:19:10 +08:00
|
|
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
2022-10-27 16:21:34 +08:00
|
|
|
|
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
|
|
|
|
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
2022-04-03 11:27:29 +08:00
|
|
|
|
|
|
|
// success case
|
2022-04-21 21:37:42 +08:00
|
|
|
importResult := &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
2022-06-14 16:18:09 +08:00
|
|
|
schema := sampleSchema()
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, schema, 2, 1, idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
2022-04-03 11:27:29 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
files := createSampleNumpyFiles(t, cm)
|
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-04-03 11:27:29 +08:00
|
|
|
assert.Nil(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
assert.Equal(t, 5, rowCounter.rowCount)
|
2022-04-21 21:37:42 +08:00
|
|
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
2022-04-03 11:27:29 +08:00
|
|
|
|
2022-11-07 17:11:02 +08:00
|
|
|
// row count of fields not equal
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath := path.Join(cm.RootPath(), "FieldInt8.npy")
|
2022-11-07 17:11:02 +08:00
|
|
|
content, err := CreateNumpyData([]int8{10})
|
|
|
|
assert.Nil(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-11-07 17:11:02 +08:00
|
|
|
files[1] = filePath
|
2022-04-03 11:27:29 +08:00
|
|
|
|
2022-09-26 18:06:54 +08:00
|
|
|
importResult.State = commonpb.ImportState_ImportStarted
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper = NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
|
|
|
|
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.NotNil(t, err)
|
2022-04-21 21:37:42 +08:00
|
|
|
assert.NotEqual(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
2022-03-30 16:25:30 +08:00
|
|
|
|
|
|
|
// file doesn't exist
|
|
|
|
files = make([]string, 0)
|
2022-11-07 17:11:02 +08:00
|
|
|
files = append(files, "/dummy/dummy.npy")
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-03-30 16:25:30 +08:00
|
|
|
assert.NotNil(t, err)
|
|
|
|
}
|
2022-04-01 10:07:28 +08:00
|
|
|
|
|
|
|
func perfSchema(dim int) *schemapb.CollectionSchema {
|
|
|
|
schema := &schemapb.CollectionSchema{
|
|
|
|
Name: "schema",
|
|
|
|
Description: "schema",
|
|
|
|
AutoID: true,
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{
|
|
|
|
FieldID: 101,
|
|
|
|
Name: "ID",
|
|
|
|
IsPrimaryKey: true,
|
|
|
|
AutoID: false,
|
|
|
|
Description: "int64",
|
|
|
|
DataType: schemapb.DataType_Int64,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
FieldID: 102,
|
|
|
|
Name: "Vector",
|
|
|
|
IsPrimaryKey: false,
|
|
|
|
Description: "float_vector",
|
|
|
|
DataType: schemapb.DataType_FloatVector,
|
|
|
|
TypeParams: []*commonpb.KeyValuePair{
|
|
|
|
{Key: "dim", Value: strconv.Itoa(dim)},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
|
|
|
return schema
|
|
|
|
}
|
|
|
|
|
2022-09-30 10:32:54 +08:00
|
|
|
func Test_ImportWrapperRowBased_perf(t *testing.T) {
|
2022-10-27 16:21:34 +08:00
|
|
|
err := os.MkdirAll(TempFilesPath, os.ModePerm)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
defer os.RemoveAll(TempFilesPath)
|
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
2022-04-01 10:07:28 +08:00
|
|
|
ctx := context.Background()
|
2022-09-23 14:40:51 +08:00
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-11-21 10:19:10 +08:00
|
|
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
2022-04-01 10:07:28 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
2022-04-01 10:07:28 +08:00
|
|
|
|
|
|
|
tr := timerecord.NewTimeRecorder("row-based parse performance")
|
|
|
|
|
|
|
|
type Entity struct {
|
|
|
|
ID int64
|
|
|
|
Vector []float32
|
|
|
|
}
|
|
|
|
|
|
|
|
type Entities struct {
|
|
|
|
Rows []*Entity
|
|
|
|
}
|
|
|
|
|
|
|
|
// change these parameters to test different cases
|
|
|
|
dim := 128
|
|
|
|
rowCount := 10000
|
|
|
|
shardNum := 2
|
|
|
|
segmentSize := 512 // unit: MB
|
|
|
|
|
|
|
|
// generate rows data
|
|
|
|
entities := &Entities{
|
|
|
|
Rows: make([]*Entity, 0),
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := 0; i < rowCount; i++ {
|
|
|
|
entity := &Entity{
|
|
|
|
ID: int64(i),
|
|
|
|
Vector: make([]float32, 0, dim),
|
|
|
|
}
|
|
|
|
for k := 0; k < dim; k++ {
|
|
|
|
entity.Vector = append(entity.Vector, float32(i)+3.1415926)
|
|
|
|
}
|
|
|
|
entities.Rows = append(entities.Rows, entity)
|
|
|
|
}
|
|
|
|
tr.Record("generate " + strconv.Itoa(rowCount) + " rows")
|
|
|
|
|
|
|
|
// generate a json file
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath := path.Join(cm.RootPath(), "row_perf.json")
|
2022-04-01 10:07:28 +08:00
|
|
|
func() {
|
2022-04-12 22:19:34 +08:00
|
|
|
var b bytes.Buffer
|
|
|
|
bw := bufio.NewWriter(&b)
|
2022-04-01 10:07:28 +08:00
|
|
|
|
2022-04-12 22:19:34 +08:00
|
|
|
encoder := json.NewEncoder(bw)
|
2022-04-01 10:07:28 +08:00
|
|
|
err = encoder.Encode(entities)
|
|
|
|
assert.Nil(t, err)
|
2022-04-12 22:19:34 +08:00
|
|
|
err = bw.Flush()
|
|
|
|
assert.NoError(t, err)
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, b.Bytes())
|
2022-04-12 22:19:34 +08:00
|
|
|
assert.NoError(t, err)
|
2022-04-01 10:07:28 +08:00
|
|
|
}()
|
2022-11-21 10:19:10 +08:00
|
|
|
tr.Record("generate large json file: " + filePath)
|
2022-04-01 10:07:28 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
2022-04-01 10:07:28 +08:00
|
|
|
|
|
|
|
schema := perfSchema(dim)
|
|
|
|
|
2022-04-21 21:37:42 +08:00
|
|
|
importResult := &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
|
|
|
|
2022-04-01 10:07:28 +08:00
|
|
|
files := make([]string, 0)
|
|
|
|
files = append(files, filePath)
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-04-01 10:07:28 +08:00
|
|
|
assert.Nil(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
assert.Equal(t, rowCount, rowCounter.rowCount)
|
2022-04-01 10:07:28 +08:00
|
|
|
|
|
|
|
tr.Record("parse large json file " + filePath)
|
|
|
|
}
|
|
|
|
|
2022-09-30 10:32:54 +08:00
|
|
|
func Test_ImportWrapperFileValidation(t *testing.T) {
|
2022-05-06 11:21:50 +08:00
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
cm := &MockChunkManager{
|
|
|
|
size: 1,
|
|
|
|
}
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
|
|
|
schema := &schemapb.CollectionSchema{
|
|
|
|
Name: "schema",
|
|
|
|
AutoID: true,
|
|
|
|
Fields: []*schemapb.FieldSchema{
|
|
|
|
{
|
|
|
|
FieldID: 101,
|
|
|
|
Name: "uid",
|
|
|
|
IsPrimaryKey: true,
|
2023-01-10 14:53:38 +08:00
|
|
|
AutoID: true,
|
2022-10-27 16:21:34 +08:00
|
|
|
DataType: schemapb.DataType_Int64,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
FieldID: 102,
|
|
|
|
Name: "bol",
|
|
|
|
IsPrimaryKey: false,
|
|
|
|
DataType: schemapb.DataType_Bool,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
}
|
2022-05-06 11:21:50 +08:00
|
|
|
shardNum := 2
|
|
|
|
segmentSize := 512 // unit: MB
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil)
|
2022-05-06 11:21:50 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("unsupported file type", func(t *testing.T) {
|
|
|
|
files := []string{"uid.txt"}
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.False(t, rowBased)
|
|
|
|
})
|
2022-05-20 10:27:56 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("duplicate files", func(t *testing.T) {
|
|
|
|
files := []string{"a/1.json", "b/1.json"}
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.True(t, rowBased)
|
2022-10-27 16:21:34 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
files = []string{"a/uid.npy", "uid.npy", "b/bol.npy"}
|
|
|
|
rowBased, err = wrapper.fileValidation(files)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.False(t, rowBased)
|
|
|
|
})
|
2022-05-06 11:21:50 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("unsupported file for row-based", func(t *testing.T) {
|
|
|
|
files := []string{"a/uid.json", "b/bol.npy"}
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.True(t, rowBased)
|
|
|
|
})
|
2022-05-06 11:21:50 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("unsupported file for column-based", func(t *testing.T) {
|
|
|
|
files := []string{"a/uid.npy", "b/bol.json"}
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.Error(t, err)
|
|
|
|
assert.False(t, rowBased)
|
|
|
|
})
|
2022-05-06 11:21:50 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("valid cases", func(t *testing.T) {
|
|
|
|
files := []string{"a/1.json", "b/2.json"}
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.True(t, rowBased)
|
2022-05-06 11:21:50 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
files = []string{"a/uid.npy", "b/bol.npy"}
|
|
|
|
rowBased, err = wrapper.fileValidation(files)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.False(t, rowBased)
|
|
|
|
})
|
2022-05-06 11:21:50 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("empty file", func(t *testing.T) {
|
|
|
|
files := []string{}
|
|
|
|
cm.size = 0
|
|
|
|
wrapper = NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil)
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
assert.False(t, rowBased)
|
|
|
|
})
|
2022-09-30 10:32:54 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("file size exceed MaxFileSize limit", func(t *testing.T) {
|
|
|
|
files := []string{"a/1.json"}
|
|
|
|
cm.size = MaxFileSize + 1
|
|
|
|
wrapper = NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil)
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.NotNil(t, err)
|
|
|
|
assert.True(t, rowBased)
|
|
|
|
})
|
2022-09-30 10:32:54 +08:00
|
|
|
|
2023-01-10 14:53:38 +08:00
|
|
|
t.Run("failed to get file size", func(t *testing.T) {
|
|
|
|
files := []string{"a/1.json"}
|
|
|
|
cm.sizeErr = errors.New("error")
|
|
|
|
rowBased, err := wrapper.fileValidation(files)
|
|
|
|
assert.NotNil(t, err)
|
|
|
|
assert.True(t, rowBased)
|
|
|
|
})
|
2022-05-06 11:21:50 +08:00
|
|
|
}
|
2022-09-26 18:06:54 +08:00
|
|
|
|
2022-09-30 10:32:54 +08:00
|
|
|
func Test_ImportWrapperReportFailRowBased(t *testing.T) {
|
2022-10-27 16:21:34 +08:00
|
|
|
err := os.MkdirAll(TempFilesPath, os.ModePerm)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
defer os.RemoveAll(TempFilesPath)
|
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
2022-09-26 18:06:54 +08:00
|
|
|
ctx := context.Background()
|
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
2022-09-26 18:06:54 +08:00
|
|
|
|
|
|
|
content := []byte(`{
|
|
|
|
"rows":[
|
2022-11-16 19:05:08 +08:00
|
|
|
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]},
|
|
|
|
{"FieldBool": false, "FieldInt8": 11, "FieldInt16": 102, "FieldInt32": 1002, "FieldInt64": 10002, "FieldFloat": 3.15, "FieldDouble": 2.56, "FieldString": "hello world", "FieldBinaryVector": [253, 0], "FieldFloatVector": [2.1, 2.2, 2.3, 2.4]},
|
|
|
|
{"FieldBool": true, "FieldInt8": 12, "FieldInt16": 103, "FieldInt32": 1003, "FieldInt64": 10003, "FieldFloat": 3.16, "FieldDouble": 3.56, "FieldString": "hello world", "FieldBinaryVector": [252, 0], "FieldFloatVector": [3.1, 3.2, 3.3, 3.4]},
|
|
|
|
{"FieldBool": false, "FieldInt8": 13, "FieldInt16": 104, "FieldInt32": 1004, "FieldInt64": 10004, "FieldFloat": 3.17, "FieldDouble": 4.56, "FieldString": "hello world", "FieldBinaryVector": [251, 0], "FieldFloatVector": [4.1, 4.2, 4.3, 4.4]},
|
|
|
|
{"FieldBool": true, "FieldInt8": 14, "FieldInt16": 105, "FieldInt32": 1005, "FieldInt64": 10005, "FieldFloat": 3.18, "FieldDouble": 5.56, "FieldString": "hello world", "FieldBinaryVector": [250, 0], "FieldFloatVector": [5.1, 5.2, 5.3, 5.4]}
|
2022-09-26 18:06:54 +08:00
|
|
|
]
|
|
|
|
}`)
|
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
filePath := path.Join(cm.RootPath(), "rows_1.json")
|
2022-09-29 16:18:56 +08:00
|
|
|
err = cm.Write(ctx, filePath, content)
|
2022-09-26 18:06:54 +08:00
|
|
|
assert.NoError(t, err)
|
2022-11-21 10:19:10 +08:00
|
|
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
2022-09-26 18:06:54 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
2022-09-26 18:06:54 +08:00
|
|
|
|
|
|
|
// success case
|
|
|
|
importResult := &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
2022-09-26 18:06:54 +08:00
|
|
|
|
2022-11-21 10:19:10 +08:00
|
|
|
files := []string{filePath}
|
2022-11-07 17:11:02 +08:00
|
|
|
wrapper.reportImportAttempts = 2
|
2022-09-26 18:06:54 +08:00
|
|
|
wrapper.reportFunc = func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return errors.New("mock error")
|
|
|
|
}
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-09-26 18:06:54 +08:00
|
|
|
assert.NotNil(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
assert.Equal(t, 5, rowCounter.rowCount)
|
2022-09-26 18:06:54 +08:00
|
|
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
|
|
|
}
|
|
|
|
|
2022-09-30 10:32:54 +08:00
|
|
|
func Test_ImportWrapperReportFailColumnBased_numpy(t *testing.T) {
|
2022-10-27 16:21:34 +08:00
|
|
|
err := os.MkdirAll(TempFilesPath, os.ModePerm)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
defer os.RemoveAll(TempFilesPath)
|
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
2022-09-26 18:06:54 +08:00
|
|
|
ctx := context.Background()
|
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
|
|
|
assert.NoError(t, err)
|
2022-11-21 10:19:10 +08:00
|
|
|
defer cm.RemoveWithPrefix(ctx, cm.RootPath())
|
2022-09-26 18:06:54 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
2022-09-26 18:06:54 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
2022-09-26 18:06:54 +08:00
|
|
|
|
|
|
|
// success case
|
|
|
|
importResult := &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
schema := sampleSchema()
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, schema, 2, 1, idAllocator, cm, importResult, reportFunc)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
2022-09-26 18:06:54 +08:00
|
|
|
|
2022-11-07 17:11:02 +08:00
|
|
|
wrapper.reportImportAttempts = 2
|
2022-09-26 18:06:54 +08:00
|
|
|
wrapper.reportFunc = func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return errors.New("mock error")
|
|
|
|
}
|
2022-10-27 16:21:34 +08:00
|
|
|
|
|
|
|
files := createSampleNumpyFiles(t, cm)
|
|
|
|
|
|
|
|
err = wrapper.Import(files, DefaultImportOptions())
|
2022-09-26 18:06:54 +08:00
|
|
|
assert.NotNil(t, err)
|
2022-10-27 16:21:34 +08:00
|
|
|
assert.Equal(t, 5, rowCounter.rowCount)
|
2022-09-26 18:06:54 +08:00
|
|
|
assert.Equal(t, commonpb.ImportState_ImportPersisted, importResult.State)
|
|
|
|
}
|
2022-09-30 10:32:54 +08:00
|
|
|
|
|
|
|
func Test_ImportWrapperIsBinlogImport(t *testing.T) {
|
2022-11-02 10:23:35 +08:00
|
|
|
err := os.MkdirAll(TempFilesPath, os.ModePerm)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
defer os.RemoveAll(TempFilesPath)
|
2022-09-30 10:32:54 +08:00
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// NewDefaultFactory() use "/tmp/milvus" as default root path, and cannot specify root path
|
|
|
|
// NewChunkManagerFactory() can specify the root path
|
|
|
|
f := storage.NewChunkManagerFactory("local", storage.RootPath(TempFilesPath))
|
|
|
|
ctx := context.Background()
|
|
|
|
cm, err := f.NewPersistentStorageChunkManager(ctx)
|
|
|
|
assert.NoError(t, err)
|
2022-09-30 10:32:54 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
2022-09-30 10:32:54 +08:00
|
|
|
schema := perfSchema(128)
|
|
|
|
shardNum := 2
|
|
|
|
segmentSize := 512 // unit: MB
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil)
|
2022-09-30 10:32:54 +08:00
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
// empty paths
|
2022-09-30 10:32:54 +08:00
|
|
|
paths := []string{}
|
|
|
|
b := wrapper.isBinlogImport(paths)
|
|
|
|
assert.False(t, b)
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
// paths count should be 2
|
2022-09-30 10:32:54 +08:00
|
|
|
paths = []string{
|
|
|
|
"path1",
|
|
|
|
"path2",
|
|
|
|
"path3",
|
|
|
|
}
|
|
|
|
b = wrapper.isBinlogImport(paths)
|
|
|
|
assert.False(t, b)
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
// not path
|
2022-09-30 10:32:54 +08:00
|
|
|
paths = []string{
|
|
|
|
"path1.txt",
|
|
|
|
"path2.jpg",
|
|
|
|
}
|
|
|
|
b = wrapper.isBinlogImport(paths)
|
|
|
|
assert.False(t, b)
|
|
|
|
|
2022-11-02 10:23:35 +08:00
|
|
|
// path doesn't exist
|
2022-09-30 10:32:54 +08:00
|
|
|
paths = []string{
|
2022-11-02 10:23:35 +08:00
|
|
|
"path1",
|
|
|
|
"path2",
|
|
|
|
}
|
|
|
|
|
|
|
|
b = wrapper.isBinlogImport(paths)
|
|
|
|
assert.True(t, b)
|
|
|
|
|
|
|
|
// the delta log path is empty, success
|
|
|
|
paths = []string{
|
|
|
|
"path1",
|
|
|
|
"",
|
2022-09-30 10:32:54 +08:00
|
|
|
}
|
|
|
|
b = wrapper.isBinlogImport(paths)
|
|
|
|
assert.True(t, b)
|
2022-11-02 10:23:35 +08:00
|
|
|
|
|
|
|
// path is empty string
|
|
|
|
paths = []string{
|
|
|
|
"",
|
|
|
|
"",
|
|
|
|
}
|
|
|
|
b = wrapper.isBinlogImport(paths)
|
|
|
|
assert.False(t, b)
|
2022-09-30 10:32:54 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func Test_ImportWrapperDoBinlogImport(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
cm := &MockChunkManager{
|
|
|
|
size: 1,
|
|
|
|
}
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
idAllocator := newIDAllocator(ctx, t, nil)
|
2022-09-30 10:32:54 +08:00
|
|
|
schema := perfSchema(128)
|
|
|
|
shardNum := 2
|
|
|
|
segmentSize := 512 // unit: MB
|
|
|
|
|
2022-10-27 16:21:34 +08:00
|
|
|
wrapper := NewImportWrapper(ctx, schema, int32(shardNum), int64(segmentSize), idAllocator, cm, nil, nil)
|
2022-09-30 10:32:54 +08:00
|
|
|
paths := []string{
|
|
|
|
"/tmp",
|
|
|
|
"/tmp",
|
|
|
|
}
|
|
|
|
wrapper.chunkManager = nil
|
|
|
|
|
|
|
|
// failed to create new BinlogParser
|
2022-10-27 16:21:34 +08:00
|
|
|
err := wrapper.doBinlogImport(paths, 0, math.MaxUint64)
|
2022-09-30 10:32:54 +08:00
|
|
|
assert.NotNil(t, err)
|
|
|
|
|
|
|
|
cm.listErr = errors.New("error")
|
|
|
|
wrapper.chunkManager = cm
|
2022-10-27 16:21:34 +08:00
|
|
|
|
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
|
|
|
wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
2022-09-30 10:32:54 +08:00
|
|
|
|
|
|
|
// failed to call parser.Parse()
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.doBinlogImport(paths, 0, math.MaxUint64)
|
2022-09-30 10:32:54 +08:00
|
|
|
assert.NotNil(t, err)
|
|
|
|
|
|
|
|
// Import() failed
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.Import(paths, DefaultImportOptions())
|
2022-09-30 10:32:54 +08:00
|
|
|
assert.NotNil(t, err)
|
|
|
|
|
|
|
|
cm.listErr = nil
|
|
|
|
wrapper.reportFunc = func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
wrapper.importResult = &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
|
|
|
|
// succeed
|
2022-10-27 16:21:34 +08:00
|
|
|
err = wrapper.doBinlogImport(paths, 0, math.MaxUint64)
|
2022-09-30 10:32:54 +08:00
|
|
|
assert.Nil(t, err)
|
|
|
|
}
|
2022-10-27 16:21:34 +08:00
|
|
|
|
2022-11-07 17:11:02 +08:00
|
|
|
func Test_ImportWrapperReportPersisted(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
2022-12-07 14:53:18 +08:00
|
|
|
tr := timerecord.NewTimeRecorder("test")
|
2022-11-07 17:11:02 +08:00
|
|
|
|
|
|
|
importResult := &rootcoordpb.ImportResult{
|
|
|
|
Status: &commonpb.Status{
|
|
|
|
ErrorCode: commonpb.ErrorCode_Success,
|
|
|
|
},
|
|
|
|
TaskId: 1,
|
|
|
|
DatanodeId: 1,
|
|
|
|
State: commonpb.ImportState_ImportStarted,
|
|
|
|
Segments: make([]int64, 0),
|
|
|
|
AutoIds: make([]int64, 0),
|
|
|
|
RowCount: 0,
|
|
|
|
}
|
|
|
|
reportFunc := func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
wrapper := NewImportWrapper(ctx, sampleSchema(), int32(2), int64(1024), nil, nil, importResult, reportFunc)
|
|
|
|
assert.NotNil(t, wrapper)
|
|
|
|
|
|
|
|
rowCounter := &rowCounterTest{}
|
|
|
|
assignSegmentFunc, flushFunc, saveSegmentFunc := createMockCallbackFunctions(t, rowCounter)
|
|
|
|
err := wrapper.SetCallbackFunctions(assignSegmentFunc, flushFunc, saveSegmentFunc)
|
|
|
|
assert.Nil(t, err)
|
|
|
|
|
|
|
|
// success
|
2022-12-07 14:53:18 +08:00
|
|
|
err = wrapper.reportPersisted(2, tr)
|
2022-11-07 17:11:02 +08:00
|
|
|
assert.Nil(t, err)
|
2022-12-07 14:53:18 +08:00
|
|
|
assert.NotEmpty(t, wrapper.importResult.GetInfos())
|
2022-11-07 17:11:02 +08:00
|
|
|
|
|
|
|
// error when closing segments
|
|
|
|
wrapper.saveSegmentFunc = func(fieldsInsert []*datapb.FieldBinlog, fieldsStats []*datapb.FieldBinlog, segmentID int64, targetChName string, rowCount int64) error {
|
|
|
|
return errors.New("error")
|
|
|
|
}
|
|
|
|
wrapper.workingSegments[0] = &WorkingSegment{}
|
2022-12-07 14:53:18 +08:00
|
|
|
err = wrapper.reportPersisted(2, tr)
|
2022-11-07 17:11:02 +08:00
|
|
|
assert.Error(t, err)
|
|
|
|
|
|
|
|
// failed to report
|
|
|
|
wrapper.saveSegmentFunc = func(fieldsInsert []*datapb.FieldBinlog, fieldsStats []*datapb.FieldBinlog, segmentID int64, targetChName string, rowCount int64) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
wrapper.reportFunc = func(res *rootcoordpb.ImportResult) error {
|
|
|
|
return errors.New("error")
|
|
|
|
}
|
2022-12-07 14:53:18 +08:00
|
|
|
err = wrapper.reportPersisted(2, tr)
|
2022-11-07 17:11:02 +08:00
|
|
|
assert.Error(t, err)
|
|
|
|
}
|
2023-01-11 17:37:44 +08:00
|
|
|
|
|
|
|
func Test_ImportWrapperUpdateProgressPercent(t *testing.T) {
|
|
|
|
ctx := context.Background()
|
|
|
|
|
|
|
|
wrapper := NewImportWrapper(ctx, sampleSchema(), 2, 1, nil, nil, nil, nil)
|
|
|
|
assert.NotNil(t, wrapper)
|
|
|
|
assert.Equal(t, int64(0), wrapper.progressPercent)
|
|
|
|
|
|
|
|
wrapper.updateProgressPercent(5)
|
|
|
|
assert.Equal(t, int64(5), wrapper.progressPercent)
|
|
|
|
|
|
|
|
wrapper.updateProgressPercent(200)
|
|
|
|
assert.Equal(t, int64(5), wrapper.progressPercent)
|
|
|
|
|
|
|
|
wrapper.updateProgressPercent(100)
|
|
|
|
assert.Equal(t, int64(100), wrapper.progressPercent)
|
|
|
|
}
|