From 5e1c3922433222607f9d30bdbda8e654c804fb4b Mon Sep 17 00:00:00 2001 From: smellthemoon <64083300+smellthemoon@users.noreply.github.com> Date: Fri, 11 Oct 2024 21:23:20 +0800 Subject: [PATCH] enhance: support null and default value in restful api (#35825) #31728 Signed-off-by: lixinguo Co-authored-by: lixinguo --- .../distributed/proxy/httpserver/constant.go | 1 + .../proxy/httpserver/handler_v1.go | 8 +- .../proxy/httpserver/handler_v2.go | 21 +- .../proxy/httpserver/handler_v2_test.go | 93 +++++ .../proxy/httpserver/request_v2.go | 2 + .../distributed/proxy/httpserver/utils.go | 215 ++++++++--- .../proxy/httpserver/utils_test.go | 343 +++++++++++++++++- 7 files changed, 617 insertions(+), 66 deletions(-) diff --git a/internal/distributed/proxy/httpserver/constant.go b/internal/distributed/proxy/httpserver/constant.go index e67cb9093d..d02c45419d 100644 --- a/internal/distributed/proxy/httpserver/constant.go +++ b/internal/distributed/proxy/httpserver/constant.go @@ -77,6 +77,7 @@ const ( HTTPIndexField = "fieldName" HTTPAliasName = "aliasName" HTTPRequestData = "data" + HTTPRequestDefaultValue = "defaultValue" DefaultDbName = "default" DefaultIndexName = "vector_idx" DefaultAliasName = "the_alias" diff --git a/internal/distributed/proxy/httpserver/handler_v1.go b/internal/distributed/proxy/httpserver/handler_v1.go index 9e6003e6d5..248330d4dc 100644 --- a/internal/distributed/proxy/httpserver/handler_v1.go +++ b/internal/distributed/proxy/httpserver/handler_v1.go @@ -718,7 +718,7 @@ func (h *HandlersV1) insert(c *gin.Context) { return nil, RestRequestInterceptorErr } body, _ := c.Get(gin.BodyBytesKey) - err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) + err, httpReq.Data, _ = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Warn("high level restful api, fail to deal with insert data", zap.Any("body", body), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -728,7 +728,7 @@ func (h *HandlersV1) insert(c *gin.Context) { return nil, RestRequestInterceptorErr } insertReq := req.(*milvuspb.InsertRequest) - insertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema) + insertReq.FieldsData, err = anyToColumns(httpReq.Data, nil, collSchema) if err != nil { log.Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -817,7 +817,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { } } body, _ := c.Get(gin.BodyBytesKey) - err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) + err, httpReq.Data, _ = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Warn("high level restful api, fail to deal with upsert data", zap.Any("body", body), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -827,7 +827,7 @@ func (h *HandlersV1) upsert(c *gin.Context) { return nil, RestRequestInterceptorErr } upsertReq := req.(*milvuspb.UpsertRequest) - upsertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema) + upsertReq.FieldsData, err = anyToColumns(httpReq.Data, nil, collSchema) if err != nil { log.Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ diff --git a/internal/distributed/proxy/httpserver/handler_v2.go b/internal/distributed/proxy/httpserver/handler_v2.go index e66e3f3953..610c114d05 100644 --- a/internal/distributed/proxy/httpserver/handler_v2.go +++ b/internal/distributed/proxy/httpserver/handler_v2.go @@ -730,7 +730,8 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN return nil, err } body, _ := c.Get(gin.BodyBytesKey) - err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) + var validDataMap map[string][]bool + err, httpReq.Data, validDataMap = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Error(err), zap.String("body", string(body.([]byte)))) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -741,7 +742,7 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN } req.NumRows = uint32(len(httpReq.Data)) - req.FieldsData, err = anyToColumns(httpReq.Data, collSchema) + req.FieldsData, err = anyToColumns(httpReq.Data, validDataMap, collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -808,7 +809,8 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN return nil, err } body, _ := c.Get(gin.BodyBytesKey) - err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema) + var validDataMap map[string][]bool + err, httpReq.Data, validDataMap = checkAndSetData(string(body.([]byte)), collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("body", body), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -819,7 +821,7 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN } req.NumRows = uint32(len(httpReq.Data)) - req.FieldsData, err = anyToColumns(httpReq.Data, collSchema) + req.FieldsData, err = anyToColumns(httpReq.Data, validDataMap, collSchema) if err != nil { log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err)) HTTPAbortReturn(c, http.StatusOK, gin.H{ @@ -1179,6 +1181,17 @@ func (h *HandlersV2) createCollection(ctx context.Context, c *gin.Context, anyRe IsPartitionKey: field.IsPartitionKey, DataType: dataType, TypeParams: []*commonpb.KeyValuePair{}, + Nullable: field.Nullable, + } + + fieldSchema.DefaultValue, err = convertDefaultValue(field.DefaultValue, dataType) + if err != nil { + log.Ctx(ctx).Warn("convert defaultValue fail", zap.Any("defaultValue", field.DefaultValue)) + HTTPAbortReturn(c, http.StatusOK, gin.H{ + HTTPReturnCode: merr.Code(err), + HTTPReturnMessage: "convert defaultValue fail, err:" + err.Error(), + }) + return nil, err } if dataType == schemapb.DataType_Array { if _, ok := schemapb.DataType_value[field.ElementDataType]; !ok { diff --git a/internal/distributed/proxy/httpserver/handler_v2_test.go b/internal/distributed/proxy/httpserver/handler_v2_test.go index 73f7d471d8..feee4f6969 100644 --- a/internal/distributed/proxy/httpserver/handler_v2_test.go +++ b/internal/distributed/proxy/httpserver/handler_v2_test.go @@ -564,6 +564,15 @@ func TestCreateCollection(t *testing.T) { requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { "fields": [ {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "null_fid", "dataType": "Int64", "nullable": true}, + {"fieldName": "default_fid_bool", "dataType": "Bool", "defaultValue": true}, + {"fieldName": "default_fid_int8", "dataType": "Int8", "defaultValue": 10}, + {"fieldName": "default_fid_int16", "dataType": "Int16", "defaultValue": 10}, + {"fieldName": "default_fid_int32", "dataType": "Int32", "defaultValue": 10}, + {"fieldName": "default_fid_int64", "dataType": "Int64", "defaultValue": 10}, + {"fieldName": "default_fid_float32", "dataType": "Float", "defaultValue": 10}, + {"fieldName": "default_fid_double", "dataType": "Double", "defaultValue": 10}, + {"fieldName": "default_fid_varchar", "dataType": "VarChar", "defaultValue": "a"}, {"fieldName": "word_count", "dataType": "Array", "elementDataType": "Int64", "elementTypeParams": {"max_capacity": 2}}, {"fieldName": "book_intro", "dataType": "FloatVector", "elementTypeParams": {"dim": 2}} ] @@ -651,6 +660,90 @@ func TestCreateCollection(t *testing.T) { errCode: 65535, }) + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "default_fid", "dataType": "Bool", "defaultValue":10, "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "convert defaultValue fail, err:Wrong defaultValue type: invalid parameter[expected=bool][actual=10]", + errCode: 1100, + }) + + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "default_fid", "dataType": "VarChar", "defaultValue":true, "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "convert defaultValue fail, err:Wrong defaultValue type: invalid parameter[expected=string][actual=true]", + errCode: 1100, + }) + + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "default_fid", "dataType": "Int8", "defaultValue":"10", "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "convert defaultValue fail, err:Wrong defaultValue type: invalid parameter[expected=number][actual=10]", + errCode: 1100, + }) + + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "default_fid", "dataType": "Int64", "defaultValue":"10", "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "convert defaultValue fail, err:Wrong defaultValue type: invalid parameter[expected=number][actual=10]", + errCode: 1100, + }) + + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "default_fid", "dataType": "Float", "defaultValue":"10", "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "convert defaultValue fail, err:Wrong defaultValue type: invalid parameter[expected=number][actual=10]", + errCode: 1100, + }) + + postTestCases = append(postTestCases, requestBodyTestCase{ + path: path, + requestBody: []byte(`{"collectionName": "` + DefaultCollectionName + `", "schema": { + "fields": [ + {"fieldName": "book_id", "dataType": "Int64", "isPrimary": true, "elementTypeParams": {}}, + {"fieldName": "word_count", "dataType": "Int64", "elementTypeParams": {}}, + {"fieldName": "default_fid", "dataType": "Double", "defaultValue":"10", "elementTypeParams": {}}, + {"fieldName": "book_intro", "dataType": "SparseFloatVector", "elementTypeParams": {"dim": 2}} + ] + }, "indexParams": [{"fieldName": "book_intro", "indexName": "book_intro_vector", "metricType": "L2"}]}`), + errMsg: "convert defaultValue fail, err:Wrong defaultValue type: invalid parameter[expected=number][actual=10]", + errCode: 1100, + }) + for _, testcase := range postTestCases { t.Run("post"+testcase.path, func(t *testing.T) { req := httptest.NewRequest(http.MethodPost, testcase.path, bytes.NewReader(testcase.requestBody)) diff --git a/internal/distributed/proxy/httpserver/request_v2.go b/internal/distributed/proxy/httpserver/request_v2.go index b98aece3be..6f37730d8e 100644 --- a/internal/distributed/proxy/httpserver/request_v2.go +++ b/internal/distributed/proxy/httpserver/request_v2.go @@ -320,6 +320,8 @@ type FieldSchema struct { IsPrimary bool `json:"isPrimary"` IsPartitionKey bool `json:"isPartitionKey"` ElementTypeParams map[string]interface{} `json:"elementTypeParams" binding:"required"` + Nullable bool `json:"nullable" binding:"required"` + DefaultValue interface{} `json:"defaultValue" binding:"required"` } type CollectionSchema struct { diff --git a/internal/distributed/proxy/httpserver/utils.go b/internal/distributed/proxy/httpserver/utils.go index af4c665036..b27314c9de 100644 --- a/internal/distributed/proxy/httpserver/utils.go +++ b/internal/distributed/proxy/httpserver/utils.go @@ -218,12 +218,13 @@ func printIndexes(indexes []*milvuspb.IndexDescription) []gin.H { // --------------------- insert param --------------------- // -func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, []map[string]interface{}) { +func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, []map[string]interface{}, map[string][]bool) { var reallyDataArray []map[string]interface{} + validDataMap := make(map[string][]bool) dataResult := gjson.Get(body, HTTPRequestData) dataResultArray := dataResult.Array() if len(dataResultArray) == 0 { - return merr.ErrMissingRequiredParameters, reallyDataArray + return merr.ErrMissingRequiredParameters, reallyDataArray, validDataMap } var fieldNames []string @@ -238,11 +239,21 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, fieldType := field.DataType fieldName := field.Name + if field.Nullable || field.DefaultValue != nil { + value := gjson.Get(data.Raw, fieldName) + if value.Type == gjson.Null { + validDataMap[fieldName] = append(validDataMap[fieldName], false) + continue + } else { + validDataMap[fieldName] = append(validDataMap[fieldName], true) + } + } + dataString := gjson.Get(data.Raw, fieldName).String() if field.IsPrimaryKey && field.AutoID { if dataString != "" { - return merr.WrapErrParameterInvalid("", "set primary key but autoID == true"), reallyDataArray + return merr.WrapErrParameterInvalid("", "set primary key but autoID == true"), reallyDataArray, validDataMap } continue } @@ -250,84 +261,84 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, switch fieldType { case schemapb.DataType_FloatVector: if dataString == "" { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap } var vectorArray []float32 err := json.Unmarshal([]byte(dataString), &vectorArray) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = vectorArray case schemapb.DataType_BinaryVector: if dataString == "" { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap } vectorStr := gjson.Get(data.Raw, fieldName).Raw var vectorArray []byte err := json.Unmarshal([]byte(vectorStr), &vectorArray) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = vectorArray case schemapb.DataType_SparseFloatVector: if dataString == "" { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap } sparseVec, err := typeutil.CreateSparseFloatRowFromJSON([]byte(dataString)) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = sparseVec case schemapb.DataType_Float16Vector: if dataString == "" { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap } vectorStr := gjson.Get(data.Raw, fieldName).Raw var vectorArray []byte err := json.Unmarshal([]byte(vectorStr), &vectorArray) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = vectorArray case schemapb.DataType_BFloat16Vector: if dataString == "" { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], "", "missing vector field: "+fieldName), reallyDataArray, validDataMap } vectorStr := gjson.Get(data.Raw, fieldName).Raw var vectorArray []byte err := json.Unmarshal([]byte(vectorStr), &vectorArray) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = vectorArray case schemapb.DataType_Bool: result, err := cast.ToBoolE(dataString) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_Int8: result, err := cast.ToInt8E(dataString) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_Int16: result, err := cast.ToInt16E(dataString) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_Int32: result, err := cast.ToInt32E(dataString) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_Int64: result, err := json.Number(dataString).Int64() if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_Array: @@ -337,7 +348,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_BoolData{ @@ -351,7 +362,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -365,7 +376,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -379,7 +390,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_IntData{ @@ -390,18 +401,10 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, } case schemapb.DataType_Int64: arr := make([]int64, 0) - numArr := make([]json.Number, 0) - err := json.Unmarshal([]byte(dataString), &numArr) + err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray - } - for _, num := range numArr { - intVal, err := num.Int64() - if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray - } - arr = append(arr, intVal) + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_LongData{ @@ -415,7 +418,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_FloatData{ @@ -429,7 +432,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_DoubleData{ @@ -443,7 +446,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, err := json.Unmarshal([]byte(dataString), &arr) if err != nil { return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)]+ - " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray + " of "+schemapb.DataType_name[int32(field.ElementType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = &schemapb.ScalarField{ Data: &schemapb.ScalarField_StringData{ @@ -458,13 +461,13 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, case schemapb.DataType_Float: result, err := cast.ToFloat32E(dataString) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_Double: result, err := cast.ToFloat64E(dataString) if err != nil { - return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray + return merr.WrapErrParameterInvalid(schemapb.DataType_name[int32(fieldType)], dataString, err.Error()), reallyDataArray, validDataMap } reallyData[fieldName] = result case schemapb.DataType_VarChar: @@ -472,7 +475,7 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, case schemapb.DataType_String: reallyData[fieldName] = dataString default: - return merr.WrapErrParameterInvalid("", schemapb.DataType_name[int32(fieldType)], "fieldName: "+fieldName), reallyDataArray + return merr.WrapErrParameterInvalid("", schemapb.DataType_name[int32(fieldType)], "fieldName: "+fieldName), reallyDataArray, validDataMap } } @@ -505,10 +508,10 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error, reallyDataArray = append(reallyDataArray, reallyData) } else { - return merr.WrapErrParameterInvalid(gjson.JSON, data.Type, "NULL:0, FALSE:1, NUMBER:2, STRING:3, TRUE:4, JSON:5"), reallyDataArray + return merr.WrapErrParameterInvalid(gjson.JSON, data.Type, "NULL:0, FALSE:1, NUMBER:2, STRING:3, TRUE:4, JSON:5"), reallyDataArray, validDataMap } } - return nil, reallyDataArray + return nil, reallyDataArray, validDataMap } func containsString(arr []string, s string) bool { @@ -612,7 +615,7 @@ func convertToIntArray(dataType schemapb.DataType, arr interface{}) []int32 { return res } -func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) ([]*schemapb.FieldData, error) { +func anyToColumns(rows []map[string]interface{}, validDataMap map[string][]bool, sch *schemapb.CollectionSchema) ([]*schemapb.FieldData, error) { rowsLen := len(rows) if rowsLen == 0 { return []*schemapb.FieldData{}, fmt.Errorf("no row need to be convert to columns") @@ -703,6 +706,9 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) continue } candi, ok := set[field.Name] + if (field.Nullable || field.DefaultValue != nil) && !ok { + continue + } if !ok { return nil, fmt.Errorf("row %d does not has field %s", idx, field.Name) } @@ -950,6 +956,7 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) default: return nil, fmt.Errorf("the type(%v) of field(%v) is not supported, use other sdk please", colData.Type, name) } + colData.ValidData = validDataMap[name] columns = append(columns, colData) } if isDynamic { @@ -1166,26 +1173,62 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap for j := 0; j < columnNum; j++ { switch fieldDataList[j].Type { case schemapb.DataType_Bool: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetBoolData().Data[i] case schemapb.DataType_Int8: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = int8(fieldDataList[j].GetScalars().GetIntData().Data[i]) case schemapb.DataType_Int16: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = int16(fieldDataList[j].GetScalars().GetIntData().Data[i]) case schemapb.DataType_Int32: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetIntData().Data[i] case schemapb.DataType_Int64: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } if enableInt64 { row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetLongData().Data[i] } else { row[fieldDataList[j].FieldName] = strconv.FormatInt(fieldDataList[j].GetScalars().GetLongData().Data[i], 10) } case schemapb.DataType_Float: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetFloatData().Data[i] case schemapb.DataType_Double: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetDoubleData().Data[i] case schemapb.DataType_String: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetStringData().Data[i] case schemapb.DataType_VarChar: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetStringData().Data[i] case schemapb.DataType_BinaryVector: row[fieldDataList[j].FieldName] = fieldDataList[j].GetVectors().GetBinaryVector()[i*(fieldDataList[j].GetVectors().GetDim()/8) : (i+1)*(fieldDataList[j].GetVectors().GetDim()/8)] @@ -1198,8 +1241,16 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap case schemapb.DataType_SparseFloatVector: row[fieldDataList[j].FieldName] = typeutil.SparseFloatBytesToMap(fieldDataList[j].GetVectors().GetSparseFloatVector().Contents[i]) case schemapb.DataType_Array: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } row[fieldDataList[j].FieldName] = fieldDataList[j].GetScalars().GetArrayData().Data[i] case schemapb.DataType_JSON: + if len(fieldDataList[j].ValidData) != 0 && !fieldDataList[j].ValidData[i] { + row[fieldDataList[j].FieldName] = nil + continue + } data, ok := fieldDataList[j].GetScalars().Data.(*schemapb.ScalarField_JsonData) if ok && !fieldDataList[j].IsDynamic { row[fieldDataList[j].FieldName] = data.JsonData.Data[i] @@ -1299,3 +1350,85 @@ func convertConsistencyLevel(reqConsistencyLevel string) (commonpb.ConsistencyLe // ConsistencyLevel_Session default in PyMilvus return commonpb.ConsistencyLevel_Session, true, nil } + +func convertDefaultValue(value interface{}, dataType schemapb.DataType) (*schemapb.ValueField, error) { + if value == nil { + return nil, nil + } + switch dataType { + case schemapb.DataType_Bool: + v, ok := value.(bool) + if !ok { + return nil, merr.WrapErrParameterInvalid("bool", value, "Wrong defaultValue type") + } + data := &schemapb.ValueField{ + Data: &schemapb.ValueField_BoolData{ + BoolData: v, + }, + } + return data, nil + + case schemapb.DataType_Int8, schemapb.DataType_Int16, schemapb.DataType_Int32: + // all passed number is float64 type + v, ok := value.(float64) + if !ok { + return nil, merr.WrapErrParameterInvalid("number", value, "Wrong defaultValue type") + } + data := &schemapb.ValueField{ + Data: &schemapb.ValueField_IntData{ + IntData: int32(v), + }, + } + return data, nil + + case schemapb.DataType_Int64: + v, ok := value.(float64) + if !ok { + return nil, merr.WrapErrParameterInvalid("number", value, "Wrong defaultValue type") + } + data := &schemapb.ValueField{ + Data: &schemapb.ValueField_LongData{ + LongData: int64(v), + }, + } + return data, nil + + case schemapb.DataType_Float: + v, ok := value.(float64) + if !ok { + return nil, merr.WrapErrParameterInvalid("number", value, "Wrong defaultValue type") + } + data := &schemapb.ValueField{ + Data: &schemapb.ValueField_FloatData{ + FloatData: float32(v), + }, + } + return data, nil + + case schemapb.DataType_Double: + v, ok := value.(float64) + if !ok { + return nil, merr.WrapErrParameterInvalid("number", value, "Wrong defaultValue type") + } + data := &schemapb.ValueField{ + Data: &schemapb.ValueField_DoubleData{ + DoubleData: v, + }, + } + return data, nil + + case schemapb.DataType_String, schemapb.DataType_VarChar: + v, ok := value.(string) + if !ok { + return nil, merr.WrapErrParameterInvalid("string", value, "Wrong defaultValue type") + } + data := &schemapb.ValueField{ + Data: &schemapb.ValueField_StringData{ + StringData: v, + }, + } + return data, nil + default: + return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("Unexpected default value type: %d", dataType)) + } +} diff --git a/internal/distributed/proxy/httpserver/utils_test.go b/internal/distributed/proxy/httpserver/utils_test.go index 8f677d6ea2..d6070ed9aa 100644 --- a/internal/distributed/proxy/httpserver/utils_test.go +++ b/internal/distributed/proxy/httpserver/utils_test.go @@ -496,12 +496,12 @@ func TestInsertWithDynamicFields(t *testing.T) { req := InsertReq{} coll := generateCollectionSchema(schemapb.DataType_Int64) var err error - err, req.Data = checkAndSetData(body, coll) + err, req.Data, _ = checkAndSetData(body, coll) assert.Equal(t, nil, err) assert.Equal(t, int64(0), req.Data[0]["id"]) assert.Equal(t, int64(1), req.Data[0]["book_id"]) assert.Equal(t, int64(2), req.Data[0]["word_count"]) - fieldsData, err := anyToColumns(req.Data, coll) + fieldsData, err := anyToColumns(req.Data, nil, coll) assert.Equal(t, nil, err) assert.Equal(t, true, fieldsData[len(fieldsData)-1].IsDynamic) assert.Equal(t, schemapb.DataType_JSON, fieldsData[len(fieldsData)-1].Type) @@ -521,7 +521,7 @@ func TestInsertWithoutVector(t *testing.T) { float16VectorField.Name = "float16Vector" bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector) bfloat16VectorField.Name = "bfloat16Vector" - err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ Name: DefaultCollectionName, Fields: []*schemapb.FieldSchema{ primaryField, floatVectorField, @@ -530,7 +530,7 @@ func TestInsertWithoutVector(t *testing.T) { }) assert.Error(t, err) assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) - err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ Name: DefaultCollectionName, Fields: []*schemapb.FieldSchema{ primaryField, binaryVectorField, @@ -539,7 +539,7 @@ func TestInsertWithoutVector(t *testing.T) { }) assert.Error(t, err) assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) - err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ Name: DefaultCollectionName, Fields: []*schemapb.FieldSchema{ primaryField, float16VectorField, @@ -548,7 +548,7 @@ func TestInsertWithoutVector(t *testing.T) { }) assert.Error(t, err) assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field")) - err, _ = checkAndSetData(body, &schemapb.CollectionSchema{ + err, _, _ = checkAndSetData(body, &schemapb.CollectionSchema{ Name: DefaultCollectionName, Fields: []*schemapb.FieldSchema{ primaryField, bfloat16VectorField, @@ -568,16 +568,81 @@ func TestInsertWithInt64(t *testing.T) { DataType: schemapb.DataType_Array, ElementType: schemapb.DataType_Int64, }) - err, data := checkAndSetData(body, coll) + err, data, validData := checkAndSetData(body, coll) assert.Equal(t, nil, err) assert.Equal(t, 1, len(data)) + assert.Equal(t, 0, len(validData)) assert.Equal(t, int64(9999999999999999), data[0][FieldBookID]) arr, _ := data[0][arrayFieldName].(*schemapb.ScalarField) assert.Equal(t, int64(9999999999999999), arr.GetLongData().GetData()[0]) +} - body = "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999.0]}}" - err, _ = checkAndSetData(body, coll) - assert.Error(t, err) +func TestInsertWithNullableField(t *testing.T) { + arrayFieldName := "array-int64" + coll := generateCollectionSchema(schemapb.DataType_Int64) + coll.Fields = append(coll.Fields, &schemapb.FieldSchema{ + Name: arrayFieldName, + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int64, + }) + coll.Fields = append(coll.Fields, &schemapb.FieldSchema{ + Name: "nullable", + DataType: schemapb.DataType_Int64, + Nullable: true, + }) + body := "{\"data\": [{\"book_id\": 9999999999999999, \"\nullable\": null,\"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]},{\"book_id\": 1, \"nullable\": 1,\"book_intro\": [0.3, 0.4], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}]" + err, data, validData := checkAndSetData(body, coll) + assert.Equal(t, nil, err) + assert.Equal(t, 2, len(data)) + assert.Equal(t, 1, len(validData)) + assert.Equal(t, 2, len(validData["nullable"])) + assert.False(t, validData["nullable"][0]) + assert.True(t, validData["nullable"][1]) + assert.Equal(t, int64(9999999999999999), data[0][FieldBookID]) + arr, _ := data[0][arrayFieldName].(*schemapb.ScalarField) + assert.Equal(t, int64(9999999999999999), arr.GetLongData().GetData()[0]) + assert.Equal(t, 4, len(data[0])) + assert.Equal(t, 5, len(data[1])) + + fieldData, err := anyToColumns(data, validData, coll) + assert.Equal(t, nil, err) + assert.Equal(t, len(coll.Fields)+1, len(fieldData)) +} + +func TestInsertWithDefaultValueField(t *testing.T) { + arrayFieldName := "array-int64" + coll := generateCollectionSchema(schemapb.DataType_Int64) + coll.Fields = append(coll.Fields, &schemapb.FieldSchema{ + Name: arrayFieldName, + DataType: schemapb.DataType_Array, + ElementType: schemapb.DataType_Int64, + }) + coll.Fields = append(coll.Fields, &schemapb.FieldSchema{ + Name: "fid", + DataType: schemapb.DataType_Int64, + DefaultValue: &schemapb.ValueField{ + Data: &schemapb.ValueField_LongData{ + LongData: 10, + }, + }, + }) + body := "{\"data\": [{\"book_id\": 9999999999999999, \"\fid\": null,\"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]},{\"book_id\": 1, \"fid\": 1,\"book_intro\": [0.3, 0.4], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}]" + err, data, validData := checkAndSetData(body, coll) + assert.Equal(t, nil, err) + assert.Equal(t, 2, len(data)) + assert.Equal(t, 1, len(validData)) + assert.Equal(t, 2, len(validData["fid"])) + assert.False(t, validData["fid"][0]) + assert.True(t, validData["fid"][1]) + assert.Equal(t, int64(9999999999999999), data[0][FieldBookID]) + arr, _ := data[0][arrayFieldName].(*schemapb.ScalarField) + assert.Equal(t, int64(9999999999999999), arr.GetLongData().GetData()[0]) + assert.Equal(t, 4, len(data[0])) + assert.Equal(t, 5, len(data[1])) + + fieldData, err := anyToColumns(data, validData, coll) + assert.Equal(t, nil, err) + assert.Equal(t, len(coll.Fields)+1, len(fieldData)) } func TestSerialize(t *testing.T) { @@ -1076,6 +1141,247 @@ func newFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.Data } } +func newNullableFieldData(fieldDatas []*schemapb.FieldData, firstFieldType schemapb.DataType) []*schemapb.FieldData { + fieldData1 := schemapb.FieldData{ + Type: schemapb.DataType_Bool, + FieldName: "field-bool", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_BoolData{ + BoolData: &schemapb.BoolArray{ + Data: []bool{true, true, true}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData1) + + fieldData2 := schemapb.FieldData{ + Type: schemapb.DataType_Int8, + FieldName: "field-int8", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{0, 1, 2}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData2) + + fieldData3 := schemapb.FieldData{ + Type: schemapb.DataType_Int16, + FieldName: "field-int16", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{0, 1, 2}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData3) + + fieldData4 := schemapb.FieldData{ + Type: schemapb.DataType_Int32, + FieldName: "field-int32", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_IntData{ + IntData: &schemapb.IntArray{ + Data: []int32{0, 1, 2}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData4) + + fieldData5 := schemapb.FieldData{ + Type: schemapb.DataType_Float, + FieldName: "field-float", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_FloatData{ + FloatData: &schemapb.FloatArray{ + Data: []float32{0, 1, 2}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData5) + + fieldData6 := schemapb.FieldData{ + Type: schemapb.DataType_Double, + FieldName: "field-double", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_DoubleData{ + DoubleData: &schemapb.DoubleArray{ + Data: []float64{0, 1, 2}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData6) + + fieldData7 := schemapb.FieldData{ + Type: schemapb.DataType_String, + FieldName: "field-string", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: []string{"0", "1", "2"}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData7) + + fieldData8 := schemapb.FieldData{ + Type: schemapb.DataType_VarChar, + FieldName: "field-varchar", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_StringData{ + StringData: &schemapb.StringArray{ + Data: []string{"0", "1", "2"}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData8) + + fieldData9 := schemapb.FieldData{ + Type: schemapb.DataType_JSON, + FieldName: "field-json", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_JsonData{ + JsonData: &schemapb.JSONArray{ + Data: [][]byte{[]byte(`{"XXX": 0}`), []byte(`{"XXX": 0}`), []byte(`{"XXX": 0}`)}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData9) + + fieldData10 := schemapb.FieldData{ + Type: schemapb.DataType_Array, + FieldName: "field-array", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_ArrayData{ + ArrayData: &schemapb.ArrayArray{ + Data: []*schemapb.ScalarField{ + {Data: &schemapb.ScalarField_BoolData{BoolData: &schemapb.BoolArray{Data: []bool{true}}}}, + {Data: &schemapb.ScalarField_BoolData{BoolData: &schemapb.BoolArray{Data: []bool{true}}}}, + {Data: &schemapb.ScalarField_BoolData{BoolData: &schemapb.BoolArray{Data: []bool{true}}}}, + }, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + + fieldData11 := schemapb.FieldData{ + Type: schemapb.DataType_Int64, + FieldName: "field-int64", + Field: &schemapb.FieldData_Scalars{ + Scalars: &schemapb.ScalarField{ + Data: &schemapb.ScalarField_LongData{ + LongData: &schemapb.LongArray{ + Data: []int64{0, 1, 2}, + }, + }, + }, + }, + ValidData: []bool{true, false, true}, + IsDynamic: false, + } + fieldDatas = append(fieldDatas, &fieldData11) + + switch firstFieldType { + case schemapb.DataType_None: + return fieldDatas + case schemapb.DataType_Bool: + return []*schemapb.FieldData{&fieldData1} + case schemapb.DataType_Int8: + return []*schemapb.FieldData{&fieldData2} + case schemapb.DataType_Int16: + return []*schemapb.FieldData{&fieldData3} + case schemapb.DataType_Int32: + return []*schemapb.FieldData{&fieldData4} + case schemapb.DataType_Float: + return []*schemapb.FieldData{&fieldData5} + case schemapb.DataType_Double: + return []*schemapb.FieldData{&fieldData6} + case schemapb.DataType_String: + return []*schemapb.FieldData{&fieldData7} + case schemapb.DataType_VarChar: + return []*schemapb.FieldData{&fieldData8} + case schemapb.DataType_BinaryVector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_FloatVector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Float16Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_BFloat16Vector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Array: + return []*schemapb.FieldData{&fieldData10} + case schemapb.DataType_JSON: + return []*schemapb.FieldData{&fieldData9} + case schemapb.DataType_SparseFloatVector: + vectorField := generateVectorFieldData(firstFieldType) + return []*schemapb.FieldData{&vectorField} + case schemapb.DataType_Int64: + return []*schemapb.FieldData{&fieldData11} + default: + return []*schemapb.FieldData{ + { + FieldName: "wrong-field-type", + Type: firstFieldType, + }, + } + } +} + func newSearchResult(results []map[string]interface{}) []map[string]interface{} { for i, result := range results { result["field-bool"] = true @@ -1229,19 +1535,21 @@ func newRowsWithArray(results []map[string]interface{}) []map[string]interface{} func TestArray(t *testing.T) { body, _ := generateRequestBody(schemapb.DataType_Int64) collectionSchema := generateCollectionSchema(schemapb.DataType_Int64) - err, rows := checkAndSetData(string(body), collectionSchema) + err, rows, validRows := checkAndSetData(string(body), collectionSchema) assert.Equal(t, nil, err) + assert.Equal(t, 0, len(validRows)) assert.Equal(t, true, compareRows(rows, generateRawRows(schemapb.DataType_Int64), compareRow)) - data, err := anyToColumns(rows, collectionSchema) + data, err := anyToColumns(rows, validRows, collectionSchema) assert.Equal(t, nil, err) assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) body, _ = generateRequestBodyWithArray(schemapb.DataType_Int64) collectionSchema = newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64)) - err, rows = checkAndSetData(string(body), collectionSchema) + err, rows, validRows = checkAndSetData(string(body), collectionSchema) assert.Equal(t, nil, err) + assert.Equal(t, 0, len(validRows)) assert.Equal(t, true, compareRows(rows, newRowsWithArray(generateRawRows(schemapb.DataType_Int64)), compareRow)) - data, err = anyToColumns(rows, collectionSchema) + data, err = anyToColumns(rows, validRows, collectionSchema) assert.Equal(t, nil, err) assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) } @@ -1297,7 +1605,7 @@ func TestVector(t *testing.T) { }, EnableDynamicField: true, } - err, rows := checkAndSetData(string(body), collectionSchema) + err, rows, validRows := checkAndSetData(string(body), collectionSchema) assert.Equal(t, nil, err) for _, row := range rows { assert.Equal(t, 1, len(row[binaryVector].([]byte))) @@ -1306,7 +1614,8 @@ func TestVector(t *testing.T) { // all test sparse rows have 2 elements, each should be of 8 bytes assert.Equal(t, 16, len(row[sparseFloatVector].([]byte))) } - data, err := anyToColumns(rows, collectionSchema) + assert.Equal(t, 0, len(validRows)) + data, err := anyToColumns(rows, validRows, collectionSchema) assert.Equal(t, nil, err) assert.Equal(t, len(collectionSchema.Fields)+1, len(data)) @@ -1317,7 +1626,7 @@ func TestVector(t *testing.T) { } row[field] = value body, _ = wrapRequestBody([]map[string]interface{}{row}) - err, _ = checkAndSetData(string(body), collectionSchema) + err, _, _ = checkAndSetData(string(body), collectionSchema) assert.Error(t, err) }