milvus/internal/util/importutil/json_parser_test.go
groot 9f8095996b
Report bulkinsert progress (#21612) (#21638)
Signed-off-by: groot <yihua.mo@zilliz.com>
2023-01-11 17:37:44 +08:00

384 lines
12 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package importutil
import (
"context"
"encoding/json"
"errors"
"math"
"strconv"
"strings"
"testing"
"github.com/milvus-io/milvus-proto/go-api/commonpb"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/stretchr/testify/assert"
)
// mock class of JSONRowCounsumer
type mockJSONRowConsumer struct {
handleErr error
rows []map[storage.FieldID]interface{}
handleCount int
}
func (v *mockJSONRowConsumer) Handle(rows []map[storage.FieldID]interface{}) error {
if v.handleErr != nil {
return v.handleErr
}
if rows != nil {
v.rows = append(v.rows, rows...)
}
v.handleCount++
return nil
}
func Test_AdjustBufSize(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
// small row
schema := sampleSchema()
parser := NewJSONParser(ctx, schema, nil)
assert.NotNil(t, parser)
assert.Greater(t, parser.bufRowCount, 0)
// huge row
schema.Fields[9].TypeParams = []*commonpb.KeyValuePair{
{Key: "dim", Value: "32768"},
}
parser = NewJSONParser(ctx, schema, nil)
assert.NotNil(t, parser)
assert.Greater(t, parser.bufRowCount, 0)
// no change
schema = &schemapb.CollectionSchema{
Name: "schema",
Description: "schema",
AutoID: true,
Fields: []*schemapb.FieldSchema{},
}
parser = NewJSONParser(ctx, schema, nil)
assert.NotNil(t, parser)
assert.Greater(t, parser.bufRowCount, 0)
}
func Test_JSONParserParseRows_IntPK(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
schema := sampleSchema()
parser := NewJSONParser(ctx, schema, nil)
assert.NotNil(t, parser)
// prepare test data
content := &sampleContent{
Rows: make([]sampleRow, 0),
}
for i := 0; i < 10; i++ {
row := sampleRow{
FieldBool: i%2 == 0,
FieldInt8: int8(i % math.MaxInt8),
FieldInt16: int16(100 + i),
FieldInt32: int32(1000 + i),
FieldInt64: int64(99999999999999999 + i),
FieldFloat: 3 + float32(i)/11,
FieldDouble: 1 + float64(i)/7,
FieldString: "No." + strconv.FormatInt(int64(i), 10),
FieldBinaryVector: []int{(200 + i) % math.MaxUint8, 0},
FieldFloatVector: []float32{float32(i) + 0.1, float32(i) + 0.2, float32(i) + 0.3, float32(i) + 0.4},
}
content.Rows = append(content.Rows, row)
}
binContent, err := json.Marshal(content)
assert.Nil(t, err)
strContent := string(binContent)
reader := strings.NewReader(strContent)
consumer := &mockJSONRowConsumer{
handleErr: nil,
rows: make([]map[int64]interface{}, 0),
handleCount: 0,
}
t.Run("parse success", func(t *testing.T) {
// set bufRowCount = 4, means call handle() after reading 4 rows
parser.bufRowCount = 4
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(len(strContent))}, consumer)
assert.Nil(t, err)
assert.Equal(t, len(content.Rows), len(consumer.rows))
for i := 0; i < len(consumer.rows); i++ {
contenctRow := content.Rows[i]
parsedRow := consumer.rows[i]
v1, ok := parsedRow[102].(bool)
assert.True(t, ok)
assert.Equal(t, contenctRow.FieldBool, v1)
v2, ok := parsedRow[103].(json.Number)
assert.True(t, ok)
assert.Equal(t, strconv.FormatInt(int64(contenctRow.FieldInt8), 10), string(v2))
v3, ok := parsedRow[104].(json.Number)
assert.True(t, ok)
assert.Equal(t, strconv.FormatInt(int64(contenctRow.FieldInt16), 10), string(v3))
v4, ok := parsedRow[105].(json.Number)
assert.True(t, ok)
assert.Equal(t, strconv.FormatInt(int64(contenctRow.FieldInt32), 10), string(v4))
v5, ok := parsedRow[106].(json.Number)
assert.True(t, ok)
assert.Equal(t, strconv.FormatInt(contenctRow.FieldInt64, 10), string(v5))
v6, ok := parsedRow[107].(json.Number)
assert.True(t, ok)
f32, err := parseFloat(string(v6), 32, "")
assert.Nil(t, err)
assert.InDelta(t, contenctRow.FieldFloat, float32(f32), 10e-6)
v7, ok := parsedRow[108].(json.Number)
assert.True(t, ok)
f64, err := parseFloat(string(v7), 64, "")
assert.Nil(t, err)
assert.InDelta(t, contenctRow.FieldDouble, f64, 10e-14)
v8, ok := parsedRow[109].(string)
assert.True(t, ok)
assert.Equal(t, contenctRow.FieldString, v8)
v9, ok := parsedRow[110].([]interface{})
assert.True(t, ok)
assert.Equal(t, len(contenctRow.FieldBinaryVector), len(v9))
for k := 0; k < len(v9); k++ {
val, ok := v9[k].(json.Number)
assert.True(t, ok)
assert.Equal(t, strconv.FormatInt(int64(contenctRow.FieldBinaryVector[k]), 10), string(val))
}
v10, ok := parsedRow[111].([]interface{})
assert.True(t, ok)
assert.Equal(t, len(contenctRow.FieldFloatVector), len(v10))
for k := 0; k < len(v10); k++ {
val, ok := v10[k].(json.Number)
assert.True(t, ok)
fval, err := parseFloat(string(val), 64, "")
assert.Nil(t, err)
assert.InDelta(t, contenctRow.FieldFloatVector[k], float32(fval), 10e-6)
}
}
})
t.Run("error cases", func(t *testing.T) {
// handler is nil
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(0)}, nil)
assert.NotNil(t, err)
// not a row-based format
reader = strings.NewReader(`{
"dummy":[]
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(10)}, consumer)
assert.NotNil(t, err)
// rows is not a list
reader = strings.NewReader(`{
"rows":
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(5)}, consumer)
assert.NotNil(t, err)
// typo
reader = strings.NewReader(`{
"rows": [}
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(6)}, consumer)
assert.NotNil(t, err)
// rows is not a list
reader = strings.NewReader(`{
"rows": {}
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(8)}, consumer)
assert.NotNil(t, err)
// rows is not a list of list
reader = strings.NewReader(`{
"rows": [[]]
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(10)}, consumer)
assert.NotNil(t, err)
// not valid json format
reader = strings.NewReader(`[]`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(2)}, consumer)
assert.NotNil(t, err)
// empty content
reader = strings.NewReader(`{}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(2)}, consumer)
assert.NotNil(t, err)
// empty content
reader = strings.NewReader(``)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(0)}, consumer)
assert.NotNil(t, err)
// redundant field
reader = strings.NewReader(`{
"rows":[
{"dummy": 1, "FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
]
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
assert.NotNil(t, err)
// field missed
reader = strings.NewReader(`{
"rows":[
{"FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
]
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
assert.NotNil(t, err)
// handle() error
content := `{
"rows":[
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]},
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]},
{"FieldBool": true, "FieldInt8": 10, "FieldInt16": 101, "FieldInt32": 1001, "FieldInt64": 10001, "FieldFloat": 3.14, "FieldDouble": 1.56, "FieldString": "hello world", "FieldBinaryVector": [254, 0], "FieldFloatVector": [1.1, 1.2, 1.3, 1.4]}
]
}`
consumer.handleErr = errors.New("error")
reader = strings.NewReader(content)
parser.bufRowCount = 2
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
assert.NotNil(t, err)
reader = strings.NewReader(content)
parser.bufRowCount = 5
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
assert.NotNil(t, err)
// row count is 0
reader = strings.NewReader(`{
"rows":[]
}`)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
assert.NotNil(t, err)
// canceled
consumer.handleErr = nil
cancel()
reader = strings.NewReader(content)
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(100)}, consumer)
assert.NotNil(t, err)
})
}
func Test_JSONParserParseRows_StrPK(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
schema := strKeySchema()
updateProgress := func(percent int64) {
assert.Greater(t, percent, int64(0))
}
parser := NewJSONParser(ctx, schema, updateProgress)
assert.NotNil(t, parser)
// prepare test data
content := &strKeyContent{
Rows: make([]strKeyRow, 0),
}
for i := 0; i < 10; i++ {
row := strKeyRow{
UID: "strID_" + strconv.FormatInt(int64(i), 10),
FieldInt32: int32(10000 + i),
FieldFloat: 1 + float32(i)/13,
FieldString: strconv.FormatInt(int64(i+1), 10) + " this string contains unicode character: 🎵",
FieldBool: i%3 == 0,
FieldFloatVector: []float32{float32(i) / 2, float32(i) / 3, float32(i) / 6, float32(i) / 9},
}
content.Rows = append(content.Rows, row)
}
binContent, err := json.Marshal(content)
assert.Nil(t, err)
strContent := string(binContent)
reader := strings.NewReader(strContent)
consumer := &mockJSONRowConsumer{
handleErr: nil,
rows: make([]map[int64]interface{}, 0),
handleCount: 0,
}
err = parser.ParseRows(&IOReader{r: reader, fileSize: int64(len(binContent))}, consumer)
assert.Nil(t, err)
assert.Equal(t, len(content.Rows), len(consumer.rows))
for i := 0; i < len(consumer.rows); i++ {
contenctRow := content.Rows[i]
parsedRow := consumer.rows[i]
v1, ok := parsedRow[101].(string)
assert.True(t, ok)
assert.Equal(t, contenctRow.UID, v1)
v2, ok := parsedRow[102].(json.Number)
assert.True(t, ok)
assert.Equal(t, strconv.FormatInt(int64(contenctRow.FieldInt32), 10), string(v2))
v3, ok := parsedRow[103].(json.Number)
assert.True(t, ok)
f32, err := parseFloat(string(v3), 32, "")
assert.Nil(t, err)
assert.InDelta(t, contenctRow.FieldFloat, float32(f32), 10e-6)
v4, ok := parsedRow[104].(string)
assert.True(t, ok)
assert.Equal(t, contenctRow.FieldString, v4)
v5, ok := parsedRow[105].(bool)
assert.True(t, ok)
assert.Equal(t, contenctRow.FieldBool, v5)
v6, ok := parsedRow[106].([]interface{})
assert.True(t, ok)
assert.Equal(t, len(contenctRow.FieldFloatVector), len(v6))
for k := 0; k < len(v6); k++ {
val, ok := v6[k].(json.Number)
assert.True(t, ok)
fval, err := parseFloat(string(val), 64, "")
assert.Nil(t, err)
assert.InDelta(t, contenctRow.FieldFloatVector[k], float32(fval), 10e-6)
}
}
}