milvus/internal/util/importutil/json_handler_test.go
SimFG a55f739608
Separate public proto files (#19782)
Signed-off-by: SimFG <bang.fu@zilliz.com>

Signed-off-by: SimFG <bang.fu@zilliz.com>
2022-10-16 20:49:27 +08:00

572 lines
18 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importutil
import (
"context"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
"github.com/milvus-io/milvus/internal/storage"
)
type mockIDAllocator struct {
}
func (tso *mockIDAllocator) AllocID(ctx context.Context, req *rootcoordpb.AllocIDRequest) (*rootcoordpb.AllocIDResponse, error) {
return &rootcoordpb.AllocIDResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
},
ID: int64(1),
Count: req.Count,
}, nil
}
func newIDAllocator(ctx context.Context, t *testing.T) *allocator.IDAllocator {
mockIDAllocator := &mockIDAllocator{}
idAllocator, err := allocator.NewIDAllocator(ctx, mockIDAllocator, int64(1))
assert.Nil(t, err)
err = idAllocator.Start()
assert.Nil(t, err)
return idAllocator
}
func Test_GetFieldDimension(t *testing.T) {
schema := &schemapb.FieldSchema{
FieldID: 111,
Name: "field_float_vector",
IsPrimaryKey: false,
Description: "float_vector",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "4"},
},
}
dim, err := getFieldDimension(schema)
assert.Nil(t, err)
assert.Equal(t, 4, dim)
schema.TypeParams = []*commonpb.KeyValuePair{
{Key: "dim", Value: "abc"},
}
dim, err = getFieldDimension(schema)
assert.NotNil(t, err)
assert.Equal(t, 0, dim)
schema.TypeParams = []*commonpb.KeyValuePair{}
dim, err = getFieldDimension(schema)
assert.NotNil(t, err)
assert.Equal(t, 0, dim)
}
func Test_InitValidators(t *testing.T) {
validators := make(map[storage.FieldID]*Validator)
err := initValidators(nil, validators)
assert.NotNil(t, err)
schema := sampleSchema()
// success case
err = initValidators(schema, validators)
assert.Nil(t, err)
assert.Equal(t, len(schema.Fields), len(validators))
name2ID := make(map[string]storage.FieldID)
for _, field := range schema.Fields {
name2ID[field.GetName()] = field.GetFieldID()
}
checkFunc := func(funcName string, validVal interface{}, invalidVal interface{}) {
id := name2ID[funcName]
v, ok := validators[id]
assert.True(t, ok)
err = v.validateFunc(validVal)
assert.Nil(t, err)
err = v.validateFunc(invalidVal)
assert.NotNil(t, err)
}
// validate functions
var validVal interface{} = true
var invalidVal interface{} = "aa"
checkFunc("field_bool", validVal, invalidVal)
validVal = float64(100)
invalidVal = "aa"
checkFunc("field_int8", validVal, invalidVal)
checkFunc("field_int16", validVal, invalidVal)
checkFunc("field_int32", validVal, invalidVal)
checkFunc("field_int64", validVal, invalidVal)
checkFunc("field_float", validVal, invalidVal)
checkFunc("field_double", validVal, invalidVal)
validVal = "aa"
invalidVal = 100
checkFunc("field_string", validVal, invalidVal)
validVal = []interface{}{float64(100), float64(101)}
invalidVal = "aa"
checkFunc("field_binary_vector", validVal, invalidVal)
invalidVal = []interface{}{float64(100)}
checkFunc("field_binary_vector", validVal, invalidVal)
invalidVal = []interface{}{float64(100), float64(101), float64(102)}
checkFunc("field_binary_vector", validVal, invalidVal)
invalidVal = []interface{}{true, true}
checkFunc("field_binary_vector", validVal, invalidVal)
invalidVal = []interface{}{float64(255), float64(-1)}
checkFunc("field_binary_vector", validVal, invalidVal)
validVal = []interface{}{float64(1), float64(2), float64(3), float64(4)}
invalidVal = true
checkFunc("field_float_vector", validVal, invalidVal)
invalidVal = []interface{}{float64(1), float64(2), float64(3)}
checkFunc("field_float_vector", validVal, invalidVal)
invalidVal = []interface{}{float64(1), float64(2), float64(3), float64(4), float64(5)}
checkFunc("field_float_vector", validVal, invalidVal)
invalidVal = []interface{}{"a", "b", "c", "d"}
checkFunc("field_float_vector", validVal, invalidVal)
// error cases
schema = &schemapb.CollectionSchema{
Name: "schema",
Description: "schema",
AutoID: true,
Fields: make([]*schemapb.FieldSchema, 0),
}
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: 111,
Name: "field_float_vector",
IsPrimaryKey: false,
Description: "float_vector",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "aa"},
},
})
validators = make(map[storage.FieldID]*Validator)
err = initValidators(schema, validators)
assert.NotNil(t, err)
schema.Fields = make([]*schemapb.FieldSchema, 0)
schema.Fields = append(schema.Fields, &schemapb.FieldSchema{
FieldID: 110,
Name: "field_binary_vector",
IsPrimaryKey: false,
Description: "float_vector",
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: "dim", Value: "aa"},
},
})
err = initValidators(schema, validators)
assert.NotNil(t, err)
}
func Test_JSONRowValidator(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
schema := sampleSchema()
parser := NewJSONParser(ctx, schema)
assert.NotNil(t, parser)
// 0 row case
reader := strings.NewReader(`{
"rows":[]
}`)
validator, err := NewJSONRowValidator(schema, nil)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseRows(reader, validator)
assert.NotNil(t, err)
assert.Equal(t, int64(0), validator.ValidateCount())
// // missed some fields
// reader = strings.NewReader(`{
// "rows":[
// {"field_bool": true, "field_int8": 10, "field_int16": 101, "field_int32": 1001, "field_int64": 10001, "field_float": 3.14, "field_double": 1.56, "field_string": "hello world", "field_binary_vector": [254, 0], "field_float_vector": [1.1, 1.2, 1.3, 1.4]},
// {"field_bool": true, "field_int8": 10, "field_int16": 101, "field_int64": 10001, "field_float": 3.14, "field_double": 1.56, "field_string": "hello world", "field_binary_vector": [254, 0], "field_float_vector": [1.1, 1.2, 1.3, 1.4]}
// ]
// }`)
// err = parser.ParseRows(reader, validator)
// assert.NotNil(t, err)
// invalid dimension
reader = strings.NewReader(`{
"rows":[
{"field_bool": true, "field_int8": true, "field_int16": 101, "field_int32": 1001, "field_int64": 10001, "field_float": 3.14, "field_double": 1.56, "field_string": "hello world", "field_binary_vector": [254, 0, 1, 66, 128, 0, 1, 66], "field_float_vector": [1.1, 1.2, 1.3, 1.4]}
]
}`)
err = parser.ParseRows(reader, validator)
assert.NotNil(t, err)
// invalid value type
reader = strings.NewReader(`{
"rows":[
{"field_bool": true, "field_int8": true, "field_int16": 101, "field_int32": 1001, "field_int64": 10001, "field_float": 3.14, "field_double": 1.56, "field_string": "hello world", "field_binary_vector": [254, 0], "field_float_vector": [1.1, 1.2, 1.3, 1.4]}
]
}`)
err = parser.ParseRows(reader, validator)
assert.NotNil(t, err)
// init failed
validator.validators = nil
err = validator.Handle(nil)
assert.NotNil(t, err)
}
func Test_JSONColumnValidator(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
schema := sampleSchema()
parser := NewJSONParser(ctx, schema)
assert.NotNil(t, parser)
// 0 row case
reader := strings.NewReader(`{
"field_bool": [],
"field_int8": [],
"field_int16": [],
"field_int32": [],
"field_int64": [],
"field_float": [],
"field_double": [],
"field_string": [],
"field_binary_vector": [],
"field_float_vector": []
}`)
validator, err := NewJSONColumnValidator(schema, nil)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseColumns(reader, validator)
assert.NotNil(t, err)
for _, count := range validator.rowCounter {
assert.Equal(t, int64(0), count)
}
// different row count
reader = strings.NewReader(`{
"field_bool": [true],
"field_int8": [],
"field_int16": [],
"field_int32": [1, 2, 3],
"field_int64": [],
"field_float": [],
"field_double": [],
"field_string": [],
"field_binary_vector": [],
"field_float_vector": []
}`)
validator, err = NewJSONColumnValidator(schema, nil)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseColumns(reader, validator)
assert.NotNil(t, err)
// invalid value type
reader = strings.NewReader(`{
"dummy": [],
"field_bool": [true],
"field_int8": [1],
"field_int16": [2],
"field_int32": [3],
"field_int64": [4],
"field_float": [1],
"field_double": [1],
"field_string": [9],
"field_binary_vector": [[254, 1]],
"field_float_vector": [[1.1, 1.2, 1.3, 1.4]]
}`)
validator, err = NewJSONColumnValidator(schema, nil)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseColumns(reader, validator)
assert.NotNil(t, err)
// init failed
validator.validators = nil
err = validator.Handle(nil)
assert.NotNil(t, err)
}
func Test_JSONRowConsumer(t *testing.T) {
ctx := context.Background()
idAllocator := newIDAllocator(ctx, t)
schema := sampleSchema()
parser := NewJSONParser(ctx, schema)
assert.NotNil(t, parser)
reader := strings.NewReader(`{
"rows":[
{"field_bool": true, "field_int8": 10, "field_int16": 101, "field_int32": 1001, "field_int64": 10001, "field_float": 3.14, "field_double": 1.56, "field_string": "hello world", "field_binary_vector": [254, 0], "field_float_vector": [1.1, 1.2, 1.3, 1.4]},
{"field_bool": false, "field_int8": 11, "field_int16": 102, "field_int32": 1002, "field_int64": 10002, "field_float": 3.15, "field_double": 2.56, "field_string": "hello world", "field_binary_vector": [253, 0], "field_float_vector": [2.1, 2.2, 2.3, 2.4]},
{"field_bool": true, "field_int8": 12, "field_int16": 103, "field_int32": 1003, "field_int64": 10003, "field_float": 3.16, "field_double": 3.56, "field_string": "hello world", "field_binary_vector": [252, 0], "field_float_vector": [3.1, 3.2, 3.3, 3.4]},
{"field_bool": false, "field_int8": 13, "field_int16": 104, "field_int32": 1004, "field_int64": 10004, "field_float": 3.17, "field_double": 4.56, "field_string": "hello world", "field_binary_vector": [251, 0], "field_float_vector": [4.1, 4.2, 4.3, 4.4]},
{"field_bool": true, "field_int8": 14, "field_int16": 105, "field_int32": 1005, "field_int64": 10005, "field_float": 3.18, "field_double": 5.56, "field_string": "hello world", "field_binary_vector": [250, 0], "field_float_vector": [5.1, 5.2, 5.3, 5.4]}
]
}`)
var shardNum int32 = 2
var callTime int32
var totalCount int
consumeFunc := func(fields map[storage.FieldID]storage.FieldData, shard int) error {
assert.Equal(t, int(callTime), shard)
callTime++
rowCount := 0
for _, data := range fields {
if rowCount == 0 {
rowCount = data.RowNum()
} else {
assert.Equal(t, rowCount, data.RowNum())
}
}
totalCount += rowCount
return nil
}
consumer, err := NewJSONRowConsumer(schema, idAllocator, shardNum, 1, consumeFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
validator, err := NewJSONRowValidator(schema, consumer)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseRows(reader, validator)
assert.Nil(t, err)
assert.Equal(t, int64(5), validator.ValidateCount())
assert.Equal(t, shardNum, callTime)
assert.Equal(t, 5, totalCount)
}
func Test_JSONRowConsumerStringKey(t *testing.T) {
ctx := context.Background()
idAllocator := newIDAllocator(ctx, t)
schema := strKeySchema()
parser := NewJSONParser(ctx, schema)
assert.NotNil(t, parser)
reader := strings.NewReader(`{
"rows": [{
"uid": "Dm4aWrbNzhmjwCTEnCJ9LDPO2N09sqysxgVfbH9Zmn3nBzmwsmk0eZN6x7wSAoPQ",
"int_scalar": 9070353,
"float_scalar": 0.9798043638085004,
"string_scalar": "ShQ44OX0z8kGpRPhaXmfSsdH7JHq5DsZzu0e2umS1hrWG0uONH2RIIAdOECaaXir",
"bool_scalar": true,
"vectors": [0.5040062902126952, 0.8297619818664708, 0.20248342801564806, 0.12834786423659314]
},
{
"uid": "RP50U0d2napRjXu94a8oGikWgklvVsXFurp8RR4tHGw7N0gk1b7opm59k3FCpyPb",
"int_scalar": 8505288,
"float_scalar": 0.937913432198687,
"string_scalar": "Ld4b0avxathBdNvCrtm3QsWO1pYktUVR7WgAtrtozIwrA8vpeactNhJ85CFGQnK5",
"bool_scalar": false,
"vectors": [0.528232122836893, 0.6916116750653186, 0.41443762522548705, 0.26624344144792056]
},
{
"uid": "oxhFkQitWPPw0Bjmj7UQcn4iwvS0CU7RLAC81uQFFQjWtOdiB329CPyWkfGSeYfE",
"int_scalar": 4392660,
"float_scalar": 0.32381232630490264,
"string_scalar": "EmAlB0xdQcxeBtwlZJQnLgKodiuRinynoQtg0eXrjkq24dQohzSm7Bx3zquHd3kO",
"bool_scalar": false,
"vectors": [0.7978693027281338, 0.12394906726785092, 0.42431962903815285, 0.4098707807351914]
},
{
"uid": "sxoEL4Mpk1LdsyXhbNm059UWJ3CvxURLCQczaVI5xtBD4QcVWTDFUW7dBdye6nbn",
"int_scalar": 7927425,
"float_scalar": 0.31074026464844895,
"string_scalar": "fdY2beCvs1wSws0Gb9ySD92xwfEfJpX5DQgsWoISylBAoYOcXpRaqIJoXYS4g269",
"bool_scalar": true,
"vectors": [0.3716157812069954, 0.006981281113265229, 0.9007003458552365, 0.22492634316191004]
},
{
"uid": "g33Rqq2UQSHPRHw5FvuXxf5uGEhIAetxE6UuXXCJj0hafG8WuJr1ueZftsySCqAd",
"int_scalar": 9288807,
"float_scalar": 0.4953578200336135,
"string_scalar": "6f8Iv1zQAGksj5XxMbbI5evTrYrB8fSFQ58jl0oU7Z4BpA81VsD2tlWqkhfoBNa7",
"bool_scalar": false,
"vectors": [0.5921374209648096, 0.04234832587925662, 0.7803878096531548, 0.1964045837884633]
},
{
"uid": "ACIJd7lTXkRgUNmlQk6AbnWIKEEV8Z6OS3vDcm0w9psmt9sH3z1JLg1fNVCqiX3d",
"int_scalar": 1173595,
"float_scalar": 0.9000745450802002,
"string_scalar": "gpj9YctF2ig1l1APkvRzHbVE8PZVKRbk7nvW73qS2uQbY5l7MeIeTPwRBjasbY8z",
"bool_scalar": true,
"vectors": [0.4655121736168688, 0.6195496905333787, 0.5316616196326639, 0.3417492053890768]
},
{
"uid": "f0wRVZZ9u1bEKrAjLeZj3oliEnUjBiUl6TiermeczceBmGe6M2RHONgz3qEogrd5",
"int_scalar": 3722368,
"float_scalar": 0.7212299175768438,
"string_scalar": "xydiejGUlvS49BfBuy1EuYRKt3v2oKwC6pqy7Ga4dGWn3BnQigV4XAGawixDAGHN",
"bool_scalar": false,
"vectors": [0.6173164237304075, 0.374107748459483, 0.3686321416317251, 0.585725336391797]
},
{
"uid": "uXq9q96vUqnDebcUISFkRFT27OjD89DWhok6urXIjTuLzaSWnCVTJkrJXxFctSg0",
"int_scalar": 1940731,
"float_scalar": 0.9524404085944204,
"string_scalar": "ZXSNzR5V3t62fjop7b7DHK56ByAF0INYwycKsu6OxGP4p2j0Obs6l0NUqukypGXd",
"bool_scalar": false,
"vectors": [0.07178869784465443, 0.4208459174227864, 0.5882811425075762, 0.6867753592116734]
},
{
"uid": "EXDDklLvQIfeCJN8cES3b9mdCYDQVhq2iLj8WWA3TPtZ1SZ4Jpidj7OXJidSD7Wn",
"int_scalar": 2158426,
"float_scalar": 0.23770219927963454,
"string_scalar": "9TNeKVSMqTP8Zxs90kaAcB7n6JbIcvFWInzi9JxZQgmYxD5xLYwaCoeUzRiNAxAg",
"bool_scalar": false,
"vectors": [0.5659468293534021, 0.6275816433340369, 0.3978846871291008, 0.3571179679645908]
},
{
"uid": "mlaXOgYvB88WWRpXNyWv6UqpmvIHrC6pRo03AtaPLMpVymu0L9ioO8GWa1XgGyj0",
"int_scalar": 198279,
"float_scalar": 0.020343767010139513,
"string_scalar": "AblYGRZJiMAwDbMEkungG0yKTeuya4FgyliakWWqSOJ5TvQWB9Ki2WXbnvSsYIDF",
"bool_scalar": true,
"vectors": [0.5374636140212398, 0.7655373567912009, 0.05491796821609715, 0.349384366747262]
}
]
}`)
var shardNum int32 = 2
var callTime int32
var totalCount int
consumeFunc := func(fields map[storage.FieldID]storage.FieldData, shard int) error {
assert.Equal(t, int(callTime), shard)
callTime++
rowCount := 0
for _, data := range fields {
if rowCount == 0 {
rowCount = data.RowNum()
} else {
assert.Equal(t, rowCount, data.RowNum())
}
}
totalCount += rowCount
return nil
}
consumer, err := NewJSONRowConsumer(schema, idAllocator, shardNum, 1, consumeFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
validator, err := NewJSONRowValidator(schema, consumer)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseRows(reader, validator)
assert.Nil(t, err)
assert.Equal(t, int64(10), validator.ValidateCount())
assert.Equal(t, shardNum, callTime)
assert.Equal(t, 10, totalCount)
}
func Test_JSONColumnConsumer(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
schema := sampleSchema()
parser := NewJSONParser(ctx, schema)
assert.NotNil(t, parser)
reader := strings.NewReader(`{
"field_bool": [true, false, true, true, true],
"field_int8": [10, 11, 12, 13, 14],
"field_int16": [100, 101, 102, 103, 104],
"field_int32": [1000, 1001, 1002, 1003, 1004],
"field_int64": [10000, 10001, 10002, 10003, 10004],
"field_float": [3.14, 3.15, 3.16, 3.17, 3.18],
"field_double": [5.1, 5.2, 5.3, 5.4, 5.5],
"field_string": ["a", "b", "c", "d", "e"],
"field_binary_vector": [
[254, 1],
[253, 2],
[252, 3],
[251, 4],
[250, 5]
],
"field_float_vector": [
[1.1, 1.2, 1.3, 1.4],
[2.1, 2.2, 2.3, 2.4],
[3.1, 3.2, 3.3, 3.4],
[4.1, 4.2, 4.3, 4.4],
[5.1, 5.2, 5.3, 5.4]
]
}`)
callTime := 0
rowCount := 0
consumeFunc := func(fields map[storage.FieldID]storage.FieldData) error {
callTime++
for id, data := range fields {
if id == common.RowIDField {
continue
}
if rowCount == 0 {
rowCount = data.RowNum()
} else {
assert.Equal(t, rowCount, data.RowNum())
}
}
return nil
}
consumer, err := NewJSONColumnConsumer(schema, consumeFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
validator, err := NewJSONColumnValidator(schema, consumer)
assert.NotNil(t, validator)
assert.Nil(t, err)
err = parser.ParseColumns(reader, validator)
assert.Nil(t, err)
for _, count := range validator.ValidateCount() {
assert.Equal(t, int64(5), count)
}
assert.Equal(t, 1, callTime)
assert.Equal(t, 5, rowCount)
}