milvus/internal/util/importutil/json_handler_test.go
groot 8da44638ac
Refine json parser for import (#20545)
Signed-off-by: yhmo <yihua.mo@zilliz.com>

Signed-off-by: yhmo <yihua.mo@zilliz.com>
2022-11-16 19:05:08 +08:00

302 lines
8.3 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importutil
import (
"context"
"errors"
"testing"
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/allocator"
"github.com/milvus-io/milvus/internal/proto/rootcoordpb"
"github.com/milvus-io/milvus/internal/storage"
)
type mockIDAllocator struct {
allocErr error
}
func (a *mockIDAllocator) AllocID(ctx context.Context, req *rootcoordpb.AllocIDRequest) (*rootcoordpb.AllocIDResponse, error) {
return &rootcoordpb.AllocIDResponse{
Status: &commonpb.Status{
ErrorCode: commonpb.ErrorCode_Success,
Reason: "",
},
ID: int64(1),
Count: req.Count,
}, a.allocErr
}
func newIDAllocator(ctx context.Context, t *testing.T, allocErr error) *allocator.IDAllocator {
mockIDAllocator := &mockIDAllocator{
allocErr: allocErr,
}
idAllocator, err := allocator.NewIDAllocator(ctx, mockIDAllocator, int64(1))
assert.Nil(t, err)
err = idAllocator.Start()
assert.Nil(t, err)
return idAllocator
}
func Test_NewJSONRowConsumer(t *testing.T) {
// nil schema
consumer, err := NewJSONRowConsumer(nil, nil, 2, 16, nil)
assert.NotNil(t, err)
assert.Nil(t, consumer)
// wrong schema
schema := &schemapb.CollectionSchema{
Name: "schema",
AutoID: true,
Fields: []*schemapb.FieldSchema{
{
FieldID: 101,
Name: "uid",
IsPrimaryKey: true,
AutoID: false,
DataType: schemapb.DataType_None,
},
},
}
consumer, err = NewJSONRowConsumer(schema, nil, 2, 16, nil)
assert.NotNil(t, err)
assert.Nil(t, consumer)
// no primary key
schema.Fields[0].IsPrimaryKey = false
schema.Fields[0].DataType = schemapb.DataType_Int64
consumer, err = NewJSONRowConsumer(schema, nil, 2, 16, nil)
assert.NotNil(t, err)
assert.Nil(t, consumer)
// primary key is autoid, but no IDAllocator
schema.Fields[0].IsPrimaryKey = true
schema.Fields[0].AutoID = true
consumer, err = NewJSONRowConsumer(schema, nil, 2, 16, nil)
assert.NotNil(t, err)
assert.Nil(t, consumer)
// success
consumer, err = NewJSONRowConsumer(sampleSchema(), nil, 2, 16, nil)
assert.NotNil(t, consumer)
assert.Nil(t, err)
}
func Test_JSONRowConsumerFlush(t *testing.T) {
var callTime int32
var totalCount int
flushFunc := func(fields map[storage.FieldID]storage.FieldData, shard int) error {
callTime++
field, ok := fields[101]
assert.True(t, ok)
assert.Greater(t, field.RowNum(), 0)
totalCount += field.RowNum()
return nil
}
schema := &schemapb.CollectionSchema{
Name: "schema",
AutoID: true,
Fields: []*schemapb.FieldSchema{
{
FieldID: 101,
Name: "uid",
IsPrimaryKey: true,
AutoID: false,
DataType: schemapb.DataType_Int64,
},
},
}
var shardNum int32 = 4
var blockSize int64 = 1
consumer, err := NewJSONRowConsumer(schema, nil, shardNum, blockSize, flushFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
// force flush
rowCountEachShard := 100
for i := 0; i < int(shardNum); i++ {
pkFieldData := consumer.segmentsData[i][101].(*storage.Int64FieldData)
for j := 0; j < rowCountEachShard; j++ {
pkFieldData.Data = append(pkFieldData.Data, int64(j))
}
pkFieldData.NumRows = []int64{int64(rowCountEachShard)}
}
err = consumer.flush(true)
assert.Nil(t, err)
assert.Equal(t, shardNum, callTime)
assert.Equal(t, rowCountEachShard*int(shardNum), totalCount)
assert.Equal(t, 0, len(consumer.IDRange())) // not auto-generated id, no id range
// execeed block size trigger flush
callTime = 0
totalCount = 0
for i := 0; i < int(shardNum); i++ {
consumer.segmentsData[i] = initSegmentData(schema)
if i%2 == 0 {
continue
}
pkFieldData := consumer.segmentsData[i][101].(*storage.Int64FieldData)
for j := 0; j < rowCountEachShard; j++ {
pkFieldData.Data = append(pkFieldData.Data, int64(j))
}
pkFieldData.NumRows = []int64{int64(rowCountEachShard)}
}
err = consumer.flush(true)
assert.Nil(t, err)
assert.Equal(t, shardNum/2, callTime)
assert.Equal(t, rowCountEachShard*int(shardNum)/2, totalCount)
assert.Equal(t, 0, len(consumer.IDRange())) // not auto-generated id, no id range
}
func Test_JSONRowConsumerHandle(t *testing.T) {
ctx := context.Background()
idAllocator := newIDAllocator(ctx, t, errors.New("error"))
var callTime int32
flushFunc := func(fields map[storage.FieldID]storage.FieldData, shard int) error {
callTime++
return errors.New("dummy error")
}
schema := &schemapb.CollectionSchema{
Name: "schema",
AutoID: true,
Fields: []*schemapb.FieldSchema{
{
FieldID: 101,
Name: "uid",
IsPrimaryKey: true,
AutoID: true,
DataType: schemapb.DataType_Int64,
},
},
}
var consumer *JSONRowConsumer
err := consumer.Handle(nil)
assert.Error(t, err)
t.Run("handle int64 pk", func(t *testing.T) {
consumer, err := NewJSONRowConsumer(schema, idAllocator, 1, 1, flushFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
pkFieldData := consumer.segmentsData[0][101].(*storage.Int64FieldData)
for i := 0; i < 10; i++ {
pkFieldData.Data = append(pkFieldData.Data, int64(i))
}
pkFieldData.NumRows = []int64{int64(10)}
// nil input will trigger flush
err = consumer.Handle(nil)
assert.NotNil(t, err)
assert.Equal(t, int32(1), callTime)
// optional flush
callTime = 0
rowCount := 100
pkFieldData = consumer.segmentsData[0][101].(*storage.Int64FieldData)
for j := 0; j < rowCount; j++ {
pkFieldData.Data = append(pkFieldData.Data, int64(j))
}
pkFieldData.NumRows = []int64{int64(rowCount)}
input := make([]map[storage.FieldID]interface{}, rowCount)
for j := 0; j < rowCount; j++ {
input[j] = make(map[int64]interface{})
input[j][101] = int64(j)
}
err = consumer.Handle(input)
assert.NotNil(t, err)
assert.Equal(t, int32(1), callTime)
// failed to auto-generate pk
consumer.blockSize = 1024 * 1024
err = consumer.Handle(input)
assert.NotNil(t, err)
// hash int64 pk
consumer.rowIDAllocator = newIDAllocator(ctx, t, nil)
err = consumer.Handle(input)
assert.Nil(t, err)
assert.Equal(t, int64(rowCount), consumer.rowCounter)
assert.Equal(t, 2, len(consumer.autoIDRange))
assert.Equal(t, int64(1), consumer.autoIDRange[0])
assert.Equal(t, int64(1+rowCount), consumer.autoIDRange[1])
// pk is auto-generated byt IDAllocator is nil
consumer.rowIDAllocator = nil
err = consumer.Handle(input)
assert.Error(t, err)
})
t.Run("handle varchar pk", func(t *testing.T) {
schema = &schemapb.CollectionSchema{
Name: "schema",
AutoID: true,
Fields: []*schemapb.FieldSchema{
{
FieldID: 101,
Name: "uid",
IsPrimaryKey: true,
AutoID: true,
DataType: schemapb.DataType_VarChar,
TypeParams: []*commonpb.KeyValuePair{
{Key: "max_length", Value: "1024"},
},
},
},
}
idAllocator := newIDAllocator(ctx, t, nil)
consumer, err := NewJSONRowConsumer(schema, idAllocator, 1, 1024*1024, flushFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
rowCount := 100
input := make([]map[storage.FieldID]interface{}, rowCount)
for j := 0; j < rowCount; j++ {
input[j] = make(map[int64]interface{})
input[j][101] = "abc"
}
// varchar pk cannot be autoid
err = consumer.Handle(input)
assert.NotNil(t, err)
// hash varchar pk
schema.Fields[0].AutoID = false
consumer, err = NewJSONRowConsumer(schema, idAllocator, 1, 1024*1024, flushFunc)
assert.NotNil(t, consumer)
assert.Nil(t, err)
err = consumer.Handle(input)
assert.Nil(t, err)
assert.Equal(t, int64(rowCount), consumer.RowCount())
assert.Equal(t, 0, len(consumer.autoIDRange))
})
}