enhance: support upsert autoid==true in Restful API and fix some bugs(#37072)(#37487) (#37766)

pr: #37072
pr: #37487

---------

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
This commit is contained in:
smellthemoon 2024-11-18 19:44:31 +08:00 committed by GitHub
parent 2a4f54cd4f
commit 46692d7525
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 342 additions and 145 deletions

View File

@ -728,7 +728,7 @@ func (h *HandlersV1) insert(c *gin.Context) {
return nil, RestRequestInterceptorErr
}
insertReq := req.(*milvuspb.InsertRequest)
insertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema)
insertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema, true)
if err != nil {
log.Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err))
HTTPAbortReturn(c, http.StatusOK, gin.H{
@ -827,7 +827,7 @@ func (h *HandlersV1) upsert(c *gin.Context) {
return nil, RestRequestInterceptorErr
}
upsertReq := req.(*milvuspb.UpsertRequest)
upsertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema)
upsertReq.FieldsData, err = anyToColumns(httpReq.Data, collSchema, false)
if err != nil {
log.Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err))
HTTPAbortReturn(c, http.StatusOK, gin.H{

View File

@ -57,7 +57,7 @@ var DefaultShowCollectionsResp = milvuspb.ShowCollectionsResponse{
var DefaultDescCollectionResp = milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}
@ -767,9 +767,9 @@ func TestInsertForDataType(t *testing.T) {
paramtable.Init()
paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true")
schemas := map[string]*schemapb.CollectionSchema{
"[success]kinds of data type": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64)),
"[success]with dynamic field": withDynamicField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64))),
"[success]with array fields": withArrayField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64))),
"[success]kinds of data type": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false, true)),
"[success]with dynamic field": withDynamicField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false, true))),
"[success]with array fields": withArrayField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false, true))),
}
for name, schema := range schemas {
t.Run(name, func(t *testing.T) {
@ -840,7 +840,7 @@ func TestReturnInt64(t *testing.T) {
}
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -871,7 +871,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -902,7 +902,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: false, Accept-Type-Allow-Int64: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -934,7 +934,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: false, Accept-Type-Allow-Int64: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -967,7 +967,7 @@ func TestReturnInt64(t *testing.T) {
paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true")
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -998,7 +998,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -1029,7 +1029,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: true, Accept-Type-Allow-Int64: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -1061,7 +1061,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: true, Accept-Type-Allow-Int64: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false, true))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,

View File

@ -746,9 +746,11 @@ func (h *HandlersV2) delete(ctx context.Context, c *gin.Context, anyReq any, dbN
return h.proxy.Delete(reqCtx, req.(*milvuspb.DeleteRequest))
})
if err == nil {
HTTPReturn(c, http.StatusOK, wrapperReturnDefaultWithCost(
proxy.GetCostValue(resp.(*milvuspb.MutationResult).GetStatus()),
))
deleteResp := resp.(*milvuspb.MutationResult)
HTTPReturn(c, http.StatusOK, gin.H{
HTTPReturnCode: merr.Code(nil),
HTTPReturnData: gin.H{"deleteCount": deleteResp.DeleteCnt},
})
}
return resp, err
}
@ -779,7 +781,7 @@ func (h *HandlersV2) insert(ctx context.Context, c *gin.Context, anyReq any, dbN
}
req.NumRows = uint32(len(httpReq.Data))
req.FieldsData, err = anyToColumns(httpReq.Data, collSchema)
req.FieldsData, err = anyToColumns(httpReq.Data, collSchema, true)
if err != nil {
log.Ctx(ctx).Warn("high level restful api, fail to deal with insert data", zap.Any("data", httpReq.Data), zap.Error(err))
HTTPAbortReturn(c, http.StatusOK, gin.H{
@ -840,11 +842,6 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN
if err != nil {
return nil, err
}
if collSchema.AutoID {
err := merr.WrapErrParameterInvalid("autoID: false", "autoID: true", "cannot upsert an autoID collection")
HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()})
return nil, err
}
body, _ := c.Get(gin.BodyBytesKey)
err, httpReq.Data = checkAndSetData(string(body.([]byte)), collSchema)
if err != nil {
@ -857,7 +854,7 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN
}
req.NumRows = uint32(len(httpReq.Data))
req.FieldsData, err = anyToColumns(httpReq.Data, collSchema)
req.FieldsData, err = anyToColumns(httpReq.Data, collSchema, false)
if err != nil {
log.Ctx(ctx).Warn("high level restful api, fail to deal with upsert data", zap.Any("data", httpReq.Data), zap.Error(err))
HTTPAbortReturn(c, http.StatusOK, gin.H{

View File

@ -796,7 +796,7 @@ func TestMethodGet(t *testing.T) {
mp.EXPECT().HasCollection(mock.Anything, mock.Anything).Return(&milvuspb.BoolResponse{Status: commonErrorStatus}, nil).Once()
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Twice()
@ -1253,7 +1253,7 @@ func TestDML(t *testing.T) {
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(6)
@ -1273,6 +1273,13 @@ func TestDML(t *testing.T) {
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once()
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_StrId{StrId: &schemapb.StringArray{Data: []string{}}}}}, nil).Once()
mp.EXPECT().Delete(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus}, nil).Once()
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64, true, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Once()
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once()
testEngine := initHTTPServerV2(mp, false)
queryTestCases := []requestBodyTestCase{}
queryTestCases = append(queryTestCases, requestBodyTestCase{
@ -1347,6 +1354,11 @@ func TestDML(t *testing.T) {
errMsg: "",
errCode: 65535,
})
// upsert when autoid==true
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: UpsertAction,
requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`),
})
for _, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
@ -1385,7 +1397,7 @@ func TestAllowInt64(t *testing.T) {
})
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Twice()
@ -1421,7 +1433,7 @@ func TestSearchV2(t *testing.T) {
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(11)
@ -1445,7 +1457,7 @@ func TestSearchV2(t *testing.T) {
Scores: DefaultScores,
}}, nil).Once()
mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3)
collSchema := generateCollectionSchema(schemapb.DataType_Int64)
collSchema := generateCollectionSchema(schemapb.DataType_Int64, false, true)
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
binaryVectorField.Name = "binaryVector"
float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector)

View File

@ -180,7 +180,7 @@ type SubSearchReq struct {
MetricType string `json:"metricType"`
Limit int32 `json:"limit"`
Offset int32 `json:"offset"`
SearchParams searchParams `json:"searchParams"`
SearchParams searchParams `json:"params"`
}
type HybridSearchReq struct {

View File

@ -157,6 +157,9 @@ func printFieldsV2(fields []*schemapb.FieldSchema) []gin.H {
func printFieldDetails(fields []*schemapb.FieldSchema, oldVersion bool) []gin.H {
var res []gin.H
for _, field := range fields {
if field.Name == common.MetaFieldName {
continue
}
fieldDetail := gin.H{
HTTPReturnFieldName: field.Name,
HTTPReturnFieldPrimaryKey: field.IsPrimaryKey,
@ -229,6 +232,9 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error,
var fieldNames []string
for _, field := range collSchema.Fields {
if field.IsDynamic {
continue
}
fieldNames = append(fieldNames, field.Name)
}
@ -236,15 +242,16 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error,
reallyData := map[string]interface{}{}
if data.Type == gjson.JSON {
for _, field := range collSchema.Fields {
if field.IsDynamic {
continue
}
fieldType := field.DataType
fieldName := field.Name
dataString := gjson.Get(data.Raw, fieldName).String()
if field.IsPrimaryKey && field.AutoID {
if dataString != "" {
return merr.WrapErrParameterInvalid("", "set primary key but autoID == true"), reallyDataArray
}
// if has pass pk than just to try to set it
if field.IsPrimaryKey && field.AutoID && len(dataString) == 0 {
continue
}
@ -478,9 +485,12 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error,
}
// fill dynamic schema
if collSchema.EnableDynamicField {
for mapKey, mapValue := range data.Map() {
if !containsString(fieldNames, mapKey) {
for mapKey, mapValue := range data.Map() {
if !containsString(fieldNames, mapKey) {
if collSchema.EnableDynamicField {
if mapKey == common.MetaFieldName {
return merr.WrapErrParameterInvalidMsg(fmt.Sprintf("use the invalid field name(%s) when enable dynamicField", mapKey)), nil
}
mapValueStr := mapValue.String()
switch mapValue.Type {
case gjson.True, gjson.False:
@ -500,6 +510,8 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error,
default:
log.Warn("unknown json type found", zap.Int("mapValue.Type", int(mapValue.Type)))
}
} else {
return merr.WrapErrParameterInvalidMsg("has pass more field without dynamic schema, please check it"), nil
}
}
}
@ -613,7 +625,7 @@ func convertToIntArray(dataType schemapb.DataType, arr interface{}) []int32 {
return res
}
func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema) ([]*schemapb.FieldData, error) {
func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema, inInsert bool) ([]*schemapb.FieldData, error) {
rowsLen := len(rows)
if rowsLen == 0 {
return []*schemapb.FieldData{}, fmt.Errorf("no row need to be convert to columns")
@ -625,8 +637,7 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema)
nameDims := make(map[string]int64)
fieldData := make(map[string]*schemapb.FieldData)
for _, field := range sch.Fields {
// skip auto id pk field
if (field.IsPrimaryKey && field.AutoID) || field.IsDynamic {
if (field.IsPrimaryKey && field.AutoID && inInsert) || field.IsDynamic {
continue
}
var data interface{}
@ -697,13 +708,16 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema)
return nil, err
}
for idx, field := range sch.Fields {
// skip auto id pk field
if (field.IsPrimaryKey && field.AutoID) || field.IsDynamic {
// remove pk field from candidates set, avoid adding it into dynamic column
delete(set, field.Name)
if field.IsDynamic {
continue
}
candi, ok := set[field.Name]
if field.IsPrimaryKey && field.AutoID && inInsert {
if ok {
return nil, merr.WrapErrParameterInvalidMsg(fmt.Sprintf("no need to pass pk field(%s) when autoid==true in insert", field.Name))
}
continue
}
if !ok {
return nil, fmt.Errorf("row %d does not has field %s", idx, field.Name)
}
@ -751,7 +765,7 @@ func anyToColumns(rows []map[string]interface{}, sch *schemapb.CollectionSchema)
delete(set, field.Name)
}
// if is not dynamic, but pass more field, will throw err in /internal/distributed/proxy/httpserver/utils.go@checkAndSetData
if isDynamic {
m := make(map[string]interface{})
for name, candi := range set {
@ -1203,7 +1217,7 @@ func buildQueryResp(rowsNum int64, needFields []string, fieldDataList []*schemap
case schemapb.DataType_JSON:
data, ok := fieldDataList[j].GetScalars().Data.(*schemapb.ScalarField_JsonData)
if ok && !fieldDataList[j].IsDynamic {
row[fieldDataList[j].FieldName] = data.JsonData.Data[i]
row[fieldDataList[j].FieldName] = string(data.JsonData.Data[i])
} else {
var dataMap map[string]interface{}

View File

@ -27,14 +27,14 @@ const (
var DefaultScores = []float32{0.01, 0.04, 0.09}
func generatePrimaryField(datatype schemapb.DataType) *schemapb.FieldSchema {
func generatePrimaryField(datatype schemapb.DataType, autoID bool) *schemapb.FieldSchema {
return &schemapb.FieldSchema{
FieldID: common.StartOfUserFieldID,
Name: FieldBookID,
IsPrimaryKey: true,
Description: "",
DataType: datatype,
AutoID: false,
AutoID: autoID,
}
}
@ -88,25 +88,37 @@ func generateVectorFieldSchema(dataType schemapb.DataType) *schemapb.FieldSchema
}
}
func generateCollectionSchema(primaryDataType schemapb.DataType) *schemapb.CollectionSchema {
primaryField := generatePrimaryField(primaryDataType)
func generateCollectionSchema(primaryDataType schemapb.DataType, autoID bool, isDynamic bool) *schemapb.CollectionSchema {
primaryField := generatePrimaryField(primaryDataType, autoID)
vectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
vectorField.Name = FieldBookIntro
fields := []*schemapb.FieldSchema{
primaryField, {
FieldID: common.StartOfUserFieldID + 1,
Name: FieldWordCount,
IsPrimaryKey: false,
Description: "",
DataType: 5,
AutoID: false,
}, vectorField,
}
if isDynamic {
fields = append(fields, &schemapb.FieldSchema{
FieldID: common.StartOfUserFieldID + 2,
Name: "$meta",
IsPrimaryKey: false,
Description: "",
DataType: 23,
AutoID: false,
IsDynamic: true,
})
}
return &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Description: "",
AutoID: false,
Fields: []*schemapb.FieldSchema{
primaryField, {
FieldID: common.StartOfUserFieldID + 1,
Name: FieldWordCount,
IsPrimaryKey: false,
Description: "",
DataType: 5,
AutoID: false,
}, vectorField,
},
EnableDynamicField: true,
Name: DefaultCollectionName,
Description: "",
AutoID: autoID,
Fields: fields,
EnableDynamicField: isDynamic,
}
}
@ -339,7 +351,7 @@ func generateQueryResult64(withDistance bool) []map[string]interface{} {
}
func TestPrintCollectionDetails(t *testing.T) {
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false, true)
indexes := generateIndexes()
assert.Equal(t, []gin.H{
{
@ -471,8 +483,8 @@ func TestPrintCollectionDetails(t *testing.T) {
}
func TestPrimaryField(t *testing.T) {
coll := generateCollectionSchema(schemapb.DataType_Int64)
primaryField := generatePrimaryField(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false, true)
primaryField := generatePrimaryField(schemapb.DataType_Int64, false)
field, ok := getPrimaryField(coll)
assert.Equal(t, true, ok)
assert.EqualExportedValues(t, primaryField, field)
@ -489,90 +501,256 @@ func TestPrimaryField(t *testing.T) {
assert.Equal(t, nil, err)
assert.Equal(t, "book_id in [1,2,3]", filter)
primaryField = generatePrimaryField(schemapb.DataType_VarChar)
primaryField = generatePrimaryField(schemapb.DataType_VarChar, false)
jsonStr = "{\"id\": [\"1\", \"2\", \"3\"]}"
idStr = gjson.Get(jsonStr, "id")
rangeStr, err = convertRange(primaryField, idStr)
assert.Equal(t, nil, err)
assert.Equal(t, `"1","2","3"`, rangeStr)
coll2 := generateCollectionSchema(schemapb.DataType_VarChar)
coll2 := generateCollectionSchema(schemapb.DataType_VarChar, false, true)
filter, err = checkGetPrimaryKey(coll2, idStr)
assert.Equal(t, nil, err)
assert.Equal(t, `book_id in ["1","2","3"]`, filter)
}
func TestInsertWithDynamicFields(t *testing.T) {
body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(0), req.Data[0]["id"])
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll)
assert.Equal(t, nil, err)
assert.Equal(t, true, fieldsData[len(fieldsData)-1].IsDynamic)
assert.Equal(t, schemapb.DataType_JSON, fieldsData[len(fieldsData)-1].Type)
assert.Equal(t, "{\"classified\":false,\"id\":0}", string(fieldsData[len(fieldsData)-1].GetScalars().GetJsonData().GetData()[0]))
func TestAnyToColumns(t *testing.T) {
t.Run("insert with dynamic field", func(t *testing.T) {
body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, false, true)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(0), req.Data[0]["id"])
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll, true)
assert.Equal(t, nil, err)
assert.Equal(t, true, fieldsData[len(fieldsData)-1].IsDynamic)
assert.Equal(t, schemapb.DataType_JSON, fieldsData[len(fieldsData)-1].Type)
assert.Equal(t, "{\"classified\":false,\"id\":0}", string(fieldsData[len(fieldsData)-1].GetScalars().GetJsonData().GetData()[0]))
})
t.Run("upsert with dynamic field", func(t *testing.T) {
body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, false, true)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(0), req.Data[0]["id"])
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll, false)
assert.Equal(t, nil, err)
assert.Equal(t, true, fieldsData[len(fieldsData)-1].IsDynamic)
assert.Equal(t, schemapb.DataType_JSON, fieldsData[len(fieldsData)-1].Type)
assert.Equal(t, "{\"classified\":false,\"id\":0}", string(fieldsData[len(fieldsData)-1].GetScalars().GetJsonData().GetData()[0]))
})
t.Run("insert with dynamic field, but pass pk when autoid==true", func(t *testing.T) {
body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, true, true)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(0), req.Data[0]["id"])
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
_, err = anyToColumns(req.Data, coll, true)
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "no need to pass pk field"))
})
t.Run("pass more field", func(t *testing.T) {
body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
coll := generateCollectionSchema(schemapb.DataType_Int64, true, false)
var err error
err, _ = checkAndSetData(body, coll)
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "has pass more fiel"))
})
t.Run("insert with autoid==false", func(t *testing.T) {
body := "{\"data\": {\"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, false, false)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, []float32{0.1, 0.2}, req.Data[0]["book_intro"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll, true)
assert.Equal(t, nil, err)
assert.Equal(t, 3, len(fieldsData))
assert.Equal(t, false, fieldsData[len(fieldsData)-1].IsDynamic)
})
t.Run("insert with autoid==false but has no pk", func(t *testing.T) {
body := "{\"data\": { \"book_intro\": [0.1, 0.2], \"word_count\": 2}}"
coll := generateCollectionSchema(schemapb.DataType_Int64, false, false)
var err error
err, _ = checkAndSetData(body, coll)
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "strconv.ParseInt: parsing \"\": invalid syntax"))
})
t.Run("insert with autoid==true", func(t *testing.T) {
body := "{\"data\": { \"book_intro\": [0.1, 0.2], \"word_count\": 2}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, true, false)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, []float32{0.1, 0.2}, req.Data[0]["book_intro"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll, true)
assert.Equal(t, nil, err)
assert.Equal(t, 2, len(fieldsData))
assert.Equal(t, false, fieldsData[len(fieldsData)-1].IsDynamic)
})
t.Run("upsert with autoid==true", func(t *testing.T) {
body := "{\"data\": {\"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, true, false)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, []float32{0.1, 0.2}, req.Data[0]["book_intro"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll, false)
assert.Equal(t, nil, err)
assert.Equal(t, 3, len(fieldsData))
assert.Equal(t, false, fieldsData[len(fieldsData)-1].IsDynamic)
})
t.Run("upsert with autoid==false", func(t *testing.T) {
body := "{\"data\": {\"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64, true, false)
var err error
err, req.Data = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, int64(1), req.Data[0]["book_id"])
assert.Equal(t, []float32{0.1, 0.2}, req.Data[0]["book_intro"])
assert.Equal(t, int64(2), req.Data[0]["word_count"])
fieldsData, err := anyToColumns(req.Data, coll, false)
assert.Equal(t, nil, err)
assert.Equal(t, 3, len(fieldsData))
assert.Equal(t, false, fieldsData[len(fieldsData)-1].IsDynamic)
})
}
func TestInsertWithoutVector(t *testing.T) {
body := "{\"data\": {}}"
var err error
primaryField := generatePrimaryField(schemapb.DataType_Int64)
primaryField.AutoID = true
floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
floatVectorField.Name = "floatVector"
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
binaryVectorField.Name = "binaryVector"
float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector)
float16VectorField.Name = "float16Vector"
bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector)
bfloat16VectorField.Name = "bfloat16Vector"
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, floatVectorField,
},
EnableDynamicField: true,
func TestCheckAndSetData(t *testing.T) {
t.Run("invalid field name with dynamic field", func(t *testing.T) {
body := "{\"data\": {\"id\": 0,\"$meta\": 2,\"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
coll := generateCollectionSchema(schemapb.DataType_Int64, false, true)
var err error
err, _ = checkAndSetData(body, coll)
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "use the invalid field name"))
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, binaryVectorField,
},
EnableDynamicField: true,
t.Run("without vector", func(t *testing.T) {
body := "{\"data\": {}}"
var err error
primaryField := generatePrimaryField(schemapb.DataType_Int64, true)
floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
floatVectorField.Name = "floatVector"
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
binaryVectorField.Name = "binaryVector"
float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector)
float16VectorField.Name = "float16Vector"
bfloat16VectorField := generateVectorFieldSchema(schemapb.DataType_BFloat16Vector)
bfloat16VectorField.Name = "bfloat16Vector"
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, floatVectorField,
},
EnableDynamicField: true,
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, binaryVectorField,
},
EnableDynamicField: true,
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, float16VectorField,
},
EnableDynamicField: true,
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, bfloat16VectorField,
},
EnableDynamicField: true,
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, float16VectorField,
},
EnableDynamicField: true,
t.Run("with pk when autoID == True when upsert", func(t *testing.T) {
arrayFieldName := "array-int64"
body := "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}}"
coll := generateCollectionSchema(schemapb.DataType_Int64, true, false)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int64,
})
err, data := checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, 1, len(data))
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
err, _ = checkAndSetData(body, &schemapb.CollectionSchema{
Name: DefaultCollectionName,
Fields: []*schemapb.FieldSchema{
primaryField, bfloat16VectorField,
},
EnableDynamicField: true,
t.Run("without pk when autoID == True when insert", func(t *testing.T) {
arrayFieldName := "array-int64"
body := "{\"data\": {\"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}}"
coll := generateCollectionSchema(schemapb.DataType_Int64, true, false)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int64,
})
err, data := checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, 1, len(data))
})
t.Run("with pk when autoID == false", func(t *testing.T) {
arrayFieldName := "array-int64"
body := "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}}"
coll := generateCollectionSchema(schemapb.DataType_Int64, false, false)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
ElementType: schemapb.DataType_Int64,
})
err, data := checkAndSetData(body, coll)
assert.Equal(t, nil, err)
assert.Equal(t, 1, len(data))
})
assert.Error(t, err)
assert.Equal(t, true, strings.HasPrefix(err.Error(), "missing vector field"))
}
func TestInsertWithInt64(t *testing.T) {
arrayFieldName := "array-int64"
body := "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}}"
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false, true)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
@ -674,16 +852,11 @@ func compareRow(m1 map[string]interface{}, m2 map[string]interface{}) bool {
}
}
} else if key == "field-json" {
arr1 := value.([]byte)
arr1 := value.(string)
arr2 := m2[key].([]byte)
if len(arr1) != len(arr2) {
if arr1 != string(arr2) {
return false
}
for j, element := range arr1 {
if element != arr2[j] {
return false
}
}
} else if strings.HasPrefix(key, "array-") {
continue
} else if value != m2[key] {
@ -1238,22 +1411,22 @@ func newRowsWithArray(results []map[string]interface{}) []map[string]interface{}
func TestArray(t *testing.T) {
body, _ := generateRequestBody(schemapb.DataType_Int64)
collectionSchema := generateCollectionSchema(schemapb.DataType_Int64)
collectionSchema := generateCollectionSchema(schemapb.DataType_Int64, false, true)
err, rows := checkAndSetData(string(body), collectionSchema)
assert.Equal(t, nil, err)
assert.Equal(t, true, compareRows(rows, generateRawRows(schemapb.DataType_Int64), compareRow))
data, err := anyToColumns(rows, collectionSchema)
data, err := anyToColumns(rows, collectionSchema, true)
assert.Equal(t, nil, err)
assert.Equal(t, len(collectionSchema.Fields)+1, len(data))
assert.Equal(t, len(collectionSchema.Fields), len(data))
body, _ = generateRequestBodyWithArray(schemapb.DataType_Int64)
collectionSchema = newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64))
collectionSchema = newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64, false, true))
err, rows = checkAndSetData(string(body), collectionSchema)
assert.Equal(t, nil, err)
assert.Equal(t, true, compareRows(rows, newRowsWithArray(generateRawRows(schemapb.DataType_Int64)), compareRow))
data, err = anyToColumns(rows, collectionSchema)
data, err = anyToColumns(rows, collectionSchema, true)
assert.Equal(t, nil, err)
assert.Equal(t, len(collectionSchema.Fields)+1, len(data))
assert.Equal(t, len(collectionSchema.Fields), len(data))
}
func TestVector(t *testing.T) {
@ -1287,7 +1460,7 @@ func TestVector(t *testing.T) {
sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001},
}
body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3})
primaryField := generatePrimaryField(schemapb.DataType_Int64)
primaryField := generatePrimaryField(schemapb.DataType_Int64, false)
floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
floatVectorField.Name = floatVector
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
@ -1316,7 +1489,7 @@ func TestVector(t *testing.T) {
// all test sparse rows have 2 elements, each should be of 8 bytes
assert.Equal(t, 16, len(row[sparseFloatVector].([]byte)))
}
data, err := anyToColumns(rows, collectionSchema)
data, err := anyToColumns(rows, collectionSchema, true)
assert.Equal(t, nil, err)
assert.Equal(t, len(collectionSchema.Fields)+1, len(data))

View File

@ -33,6 +33,7 @@ class TestInsertVector(TestBase):
"dimension": dim,
"primaryField": primary_field,
"vectorField": vector_field,
"autoID":True,
}
rsp = self.collection_client.collection_create(collection_payload)
assert rsp['code'] == 200