enhance: support upsert autoid==true in Restful API (#37072)

Signed-off-by: lixinguo <xinguo.li@zilliz.com>
Co-authored-by: lixinguo <xinguo.li@zilliz.com>
This commit is contained in:
smellthemoon 2024-10-25 14:33:39 +08:00 committed by GitHub
parent 6e90f9e8d9
commit 84d48b498b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 48 additions and 44 deletions

View File

@ -57,7 +57,7 @@ var DefaultShowCollectionsResp = milvuspb.ShowCollectionsResponse{
var DefaultDescCollectionResp = milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}
@ -767,9 +767,9 @@ func TestInsertForDataType(t *testing.T) {
paramtable.Init()
paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true")
schemas := map[string]*schemapb.CollectionSchema{
"[success]kinds of data type": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64)),
"[success]with dynamic field": withDynamicField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64))),
"[success]with array fields": withArrayField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64))),
"[success]kinds of data type": newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false)),
"[success]with dynamic field": withDynamicField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false))),
"[success]with array fields": withArrayField(newCollectionSchema(generateCollectionSchema(schemapb.DataType_Int64, false))),
}
for name, schema := range schemas {
t.Run(name, func(t *testing.T) {
@ -840,7 +840,7 @@ func TestReturnInt64(t *testing.T) {
}
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -871,7 +871,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -902,7 +902,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: false, Accept-Type-Allow-Int64: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -934,7 +934,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: false, Accept-Type-Allow-Int64: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -967,7 +967,7 @@ func TestReturnInt64(t *testing.T) {
paramtable.Get().Save(proxy.Params.HTTPCfg.AcceptTypeAllowInt64.Key, "true")
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -998,7 +998,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: true", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -1029,7 +1029,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[insert]httpCfg.allow: true, Accept-Type-Allow-Int64: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
@ -1061,7 +1061,7 @@ func TestReturnInt64(t *testing.T) {
for _, dataType := range schemas {
t.Run("[upsert]httpCfg.allow: true, Accept-Type-Allow-Int64: false", func(t *testing.T) {
schema := newCollectionSchema(generateCollectionSchema(dataType))
schema := newCollectionSchema(generateCollectionSchema(dataType, false))
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,

View File

@ -804,11 +804,6 @@ func (h *HandlersV2) upsert(ctx context.Context, c *gin.Context, anyReq any, dbN
if err != nil {
return nil, err
}
if collSchema.AutoID {
err := merr.WrapErrParameterInvalid("autoID: false", "autoID: true", "cannot upsert an autoID collection")
HTTPAbortReturn(c, http.StatusOK, gin.H{HTTPReturnCode: merr.Code(err), HTTPReturnMessage: err.Error()})
return nil, err
}
body, _ := c.Get(gin.BodyBytesKey)
var validDataMap map[string][]bool
err, httpReq.Data, validDataMap = checkAndSetData(string(body.([]byte)), collSchema)

View File

@ -1089,7 +1089,7 @@ func TestMethodGet(t *testing.T) {
mp.EXPECT().HasCollection(mock.Anything, mock.Anything).Return(&milvuspb.BoolResponse{Status: commonErrorStatus}, nil).Once()
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Twice()
@ -1521,7 +1521,7 @@ func TestDML(t *testing.T) {
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(6)
@ -1541,6 +1541,13 @@ func TestDML(t *testing.T) {
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once()
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_StrId{StrId: &schemapb.StringArray{Data: []string{}}}}}, nil).Once()
mp.EXPECT().Delete(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus}, nil).Once()
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64, true),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Once()
mp.EXPECT().Upsert(mock.Anything, mock.Anything).Return(&milvuspb.MutationResult{Status: commonSuccessStatus, UpsertCnt: int64(0), IDs: &schemapb.IDs{IdField: &schemapb.IDs_IntId{IntId: &schemapb.LongArray{Data: []int64{}}}}}, nil).Once()
testEngine := initHTTPServerV2(mp, false)
queryTestCases := []requestBodyTestCase{}
queryTestCases = append(queryTestCases, requestBodyTestCase{
@ -1615,6 +1622,11 @@ func TestDML(t *testing.T) {
errMsg: "",
errCode: 65535,
})
// upsert when autoid==true
queryTestCases = append(queryTestCases, requestBodyTestCase{
path: UpsertAction,
requestBody: []byte(`{"collectionName": "book", "data": [{"book_id": 0, "word_count": 0, "book_intro": [0.11825, 0.6]}]}`),
})
for _, testcase := range queryTestCases {
t.Run(testcase.path, func(t *testing.T) {
@ -1653,7 +1665,7 @@ func TestAllowInt64(t *testing.T) {
})
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Twice()
@ -1689,7 +1701,7 @@ func TestSearchV2(t *testing.T) {
mp := mocks.NewMockProxy(t)
mp.EXPECT().DescribeCollection(mock.Anything, mock.Anything).Return(&milvuspb.DescribeCollectionResponse{
CollectionName: DefaultCollectionName,
Schema: generateCollectionSchema(schemapb.DataType_Int64),
Schema: generateCollectionSchema(schemapb.DataType_Int64, false),
ShardsNum: ShardNumDefault,
Status: &StatusSuccess,
}, nil).Times(11)
@ -1713,7 +1725,7 @@ func TestSearchV2(t *testing.T) {
Scores: DefaultScores,
}}, nil).Once()
mp.EXPECT().HybridSearch(mock.Anything, mock.Anything).Return(&milvuspb.SearchResults{Status: commonSuccessStatus, Results: &schemapb.SearchResultData{TopK: int64(0)}}, nil).Times(3)
collSchema := generateCollectionSchema(schemapb.DataType_Int64)
collSchema := generateCollectionSchema(schemapb.DataType_Int64, false)
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)
binaryVectorField.Name = "binaryVector"
float16VectorField := generateVectorFieldSchema(schemapb.DataType_Float16Vector)

View File

@ -185,7 +185,7 @@ type SubSearchReq struct {
MetricType string `json:"metricType"`
Limit int32 `json:"limit"`
Offset int32 `json:"offset"`
SearchParams searchParams `json:"searchParams"`
SearchParams searchParams `json:"params"`
}
type HybridSearchReq struct {

View File

@ -281,9 +281,6 @@ func checkAndSetData(body string, collSchema *schemapb.CollectionSchema) (error,
dataString := gjson.Get(data.Raw, fieldName).String()
if field.IsPrimaryKey && field.AutoID {
if dataString != "" {
return merr.WrapErrParameterInvalid("", "set primary key but autoID == true"), reallyDataArray, validDataMap
}
continue
}

View File

@ -28,14 +28,14 @@ const (
var DefaultScores = []float32{0.01, 0.04, 0.09}
func generatePrimaryField(datatype schemapb.DataType) *schemapb.FieldSchema {
func generatePrimaryField(datatype schemapb.DataType, autoID bool) *schemapb.FieldSchema {
return &schemapb.FieldSchema{
FieldID: common.StartOfUserFieldID,
Name: FieldBookID,
IsPrimaryKey: true,
Description: "",
DataType: datatype,
AutoID: false,
AutoID: autoID,
}
}
@ -93,8 +93,8 @@ func generateVectorFieldSchema(dataType schemapb.DataType) *schemapb.FieldSchema
}
}
func generateCollectionSchema(primaryDataType schemapb.DataType) *schemapb.CollectionSchema {
primaryField := generatePrimaryField(primaryDataType)
func generateCollectionSchema(primaryDataType schemapb.DataType, autoID bool) *schemapb.CollectionSchema {
primaryField := generatePrimaryField(primaryDataType, autoID)
vectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
vectorField.Name = FieldBookIntro
return &schemapb.CollectionSchema{
@ -116,7 +116,7 @@ func generateCollectionSchema(primaryDataType schemapb.DataType) *schemapb.Colle
}
func generateDocInDocOutCollectionSchema(primaryDataType schemapb.DataType) *schemapb.CollectionSchema {
primaryField := generatePrimaryField(primaryDataType)
primaryField := generatePrimaryField(primaryDataType, false)
vectorField := generateVectorFieldSchema(schemapb.DataType_SparseFloatVector)
vectorField.Name = FieldBookIntro
vectorField.IsFunctionOutput = true
@ -382,7 +382,7 @@ func generateQueryResult64(withDistance bool) []map[string]interface{} {
}
func TestPrintCollectionDetails(t *testing.T) {
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false)
indexes := generateIndexes()
assert.Equal(t, []gin.H{
{
@ -514,8 +514,8 @@ func TestPrintCollectionDetails(t *testing.T) {
}
func TestPrimaryField(t *testing.T) {
coll := generateCollectionSchema(schemapb.DataType_Int64)
primaryField := generatePrimaryField(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false)
primaryField := generatePrimaryField(schemapb.DataType_Int64, false)
field, ok := getPrimaryField(coll)
assert.Equal(t, true, ok)
assert.EqualExportedValues(t, primaryField, field)
@ -532,13 +532,13 @@ func TestPrimaryField(t *testing.T) {
assert.Equal(t, nil, err)
assert.Equal(t, "book_id in [1,2,3]", filter)
primaryField = generatePrimaryField(schemapb.DataType_VarChar)
primaryField = generatePrimaryField(schemapb.DataType_VarChar, false)
jsonStr = "{\"id\": [\"1\", \"2\", \"3\"]}"
idStr = gjson.Get(jsonStr, "id")
rangeStr, err = convertRange(primaryField, idStr)
assert.Equal(t, nil, err)
assert.Equal(t, `"1","2","3"`, rangeStr)
coll2 := generateCollectionSchema(schemapb.DataType_VarChar)
coll2 := generateCollectionSchema(schemapb.DataType_VarChar, false)
filter, err = checkGetPrimaryKey(coll2, idStr)
assert.Equal(t, nil, err)
assert.Equal(t, `book_id in ["1","2","3"]`, filter)
@ -547,7 +547,7 @@ func TestPrimaryField(t *testing.T) {
func TestInsertWithDynamicFields(t *testing.T) {
body := "{\"data\": {\"id\": 0, \"book_id\": 1, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"classified\": false, \"databaseID\": null}}"
req := InsertReq{}
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false)
var err error
err, req.Data, _ = checkAndSetData(body, coll)
assert.Equal(t, nil, err)
@ -564,7 +564,7 @@ func TestInsertWithDynamicFields(t *testing.T) {
func TestInsertWithoutVector(t *testing.T) {
body := "{\"data\": {}}"
var err error
primaryField := generatePrimaryField(schemapb.DataType_Int64)
primaryField := generatePrimaryField(schemapb.DataType_Int64, false)
primaryField.AutoID = true
floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
floatVectorField.Name = "floatVector"
@ -615,7 +615,7 @@ func TestInsertWithoutVector(t *testing.T) {
func TestInsertWithInt64(t *testing.T) {
arrayFieldName := "array-int64"
body := "{\"data\": {\"book_id\": 9999999999999999, \"book_intro\": [0.1, 0.2], \"word_count\": 2, \"" + arrayFieldName + "\": [9999999999999999]}}"
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
@ -632,7 +632,7 @@ func TestInsertWithInt64(t *testing.T) {
func TestInsertWithNullableField(t *testing.T) {
arrayFieldName := "array-int64"
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
@ -664,7 +664,7 @@ func TestInsertWithNullableField(t *testing.T) {
func TestInsertWithDefaultValueField(t *testing.T) {
arrayFieldName := "array-int64"
coll := generateCollectionSchema(schemapb.DataType_Int64)
coll := generateCollectionSchema(schemapb.DataType_Int64, false)
coll.Fields = append(coll.Fields, &schemapb.FieldSchema{
Name: arrayFieldName,
DataType: schemapb.DataType_Array,
@ -1587,7 +1587,7 @@ func newRowsWithArray(results []map[string]interface{}) []map[string]interface{}
func TestArray(t *testing.T) {
body, _ := generateRequestBody(schemapb.DataType_Int64)
collectionSchema := generateCollectionSchema(schemapb.DataType_Int64)
collectionSchema := generateCollectionSchema(schemapb.DataType_Int64, false)
err, rows, validRows := checkAndSetData(string(body), collectionSchema)
assert.Equal(t, nil, err)
assert.Equal(t, 0, len(validRows))
@ -1597,7 +1597,7 @@ func TestArray(t *testing.T) {
assert.Equal(t, len(collectionSchema.Fields)+1, len(data))
body, _ = generateRequestBodyWithArray(schemapb.DataType_Int64)
collectionSchema = newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64))
collectionSchema = newCollectionSchemaWithArray(generateCollectionSchema(schemapb.DataType_Int64, false))
err, rows, validRows = checkAndSetData(string(body), collectionSchema)
assert.Equal(t, nil, err)
assert.Equal(t, 0, len(validRows))
@ -1638,7 +1638,7 @@ func TestVector(t *testing.T) {
sparseFloatVector: map[uint32]float32{987621: 32190.31, 32189: 0.0001},
}
body, _ := wrapRequestBody([]map[string]interface{}{row1, row2, row3})
primaryField := generatePrimaryField(schemapb.DataType_Int64)
primaryField := generatePrimaryField(schemapb.DataType_Int64, false)
floatVectorField := generateVectorFieldSchema(schemapb.DataType_FloatVector)
floatVectorField.Name = floatVector
binaryVectorField := generateVectorFieldSchema(schemapb.DataType_BinaryVector)