milvus/internal/querynode/result_test.go
Cai Yudong e3d1624e97
Handle distance Inf correctly (#21828)
Signed-off-by: yudong.cai <yudong.cai@zilliz.com>
2023-02-01 14:59:51 +08:00

528 lines
18 KiB
Go

// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package querynode
import (
"context"
"testing"
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/common"
"github.com/milvus-io/milvus/internal/proto/internalpb"
"github.com/milvus-io/milvus/internal/proto/segcorepb"
"github.com/milvus-io/milvus/internal/util/typeutil"
)
func TestResult_mergeSegcoreRetrieveResults(t *testing.T) {
const (
Dim = 8
Int64FieldName = "Int64Field"
FloatVectorFieldName = "FloatVectorField"
Int64FieldID = common.StartOfUserFieldID + 1
FloatVectorFieldID = common.StartOfUserFieldID + 2
)
Int64Array := []int64{11, 22}
FloatVector := []float32{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 11.0, 22.0, 33.0, 44.0, 55.0, 66.0, 77.0, 88.0}
var fieldDataArray1 []*schemapb.FieldData
fieldDataArray1 = append(fieldDataArray1, genFieldData(Int64FieldName, Int64FieldID, schemapb.DataType_Int64, Int64Array[0:2], 1))
fieldDataArray1 = append(fieldDataArray1, genFieldData(FloatVectorFieldName, FloatVectorFieldID, schemapb.DataType_FloatVector, FloatVector[0:16], Dim))
var fieldDataArray2 []*schemapb.FieldData
fieldDataArray2 = append(fieldDataArray2, genFieldData(Int64FieldName, Int64FieldID, schemapb.DataType_Int64, Int64Array[0:2], 1))
fieldDataArray2 = append(fieldDataArray2, genFieldData(FloatVectorFieldName, FloatVectorFieldID, schemapb.DataType_FloatVector, FloatVector[0:16], Dim))
t.Run("test skip dupPK 2", func(t *testing.T) {
result1 := &segcorepb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
},
},
},
Offset: []int64{0, 1},
FieldsData: fieldDataArray1,
}
result2 := &segcorepb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
},
},
},
Offset: []int64{0, 1},
FieldsData: fieldDataArray2,
}
result, err := mergeSegcoreRetrieveResults(context.Background(), []*segcorepb.RetrieveResults{result1, result2}, typeutil.Unlimited)
assert.NoError(t, err)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []int64{0, 1}, result.GetIds().GetIntId().GetData())
assert.Equal(t, Int64Array, result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, FloatVector, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
})
t.Run("test nil results", func(t *testing.T) {
ret, err := mergeSegcoreRetrieveResults(context.Background(), nil, typeutil.Unlimited)
assert.NoError(t, err)
assert.Empty(t, ret.GetIds())
assert.Empty(t, ret.GetFieldsData())
})
t.Run("test no offset", func(t *testing.T) {
r := &segcorepb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
},
},
},
FieldsData: fieldDataArray1,
}
ret, err := mergeSegcoreRetrieveResults(context.Background(), []*segcorepb.RetrieveResults{r}, typeutil.Unlimited)
assert.NoError(t, err)
assert.Empty(t, ret.GetIds())
assert.Empty(t, ret.GetFieldsData())
})
t.Run("test merge", func(t *testing.T) {
r1 := &segcorepb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 3},
},
},
},
Offset: []int64{0, 1},
FieldsData: fieldDataArray1,
}
r2 := &segcorepb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{2, 4},
},
},
},
Offset: []int64{0, 1},
FieldsData: fieldDataArray2,
}
resultFloat := []float32{
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
11.0, 22.0, 33.0, 44.0, 55.0, 66.0, 77.0, 88.0,
11.0, 22.0, 33.0, 44.0, 55.0, 66.0, 77.0, 88.0}
t.Run("test limited", func(t *testing.T) {
tests := []struct {
description string
limit int64
}{
{"limit 1", 1},
{"limit 2", 2},
{"limit 3", 3},
{"limit 4", 4},
}
resultIDs := []int64{1, 2, 3, 4}
resultField0 := []int64{11, 11, 22, 22}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
result, err := mergeSegcoreRetrieveResults(context.Background(), []*segcorepb.RetrieveResults{r1, r2}, test.limit)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, int(test.limit), len(result.GetIds().GetIntId().GetData()))
assert.Equal(t, resultIDs[0:test.limit], result.GetIds().GetIntId().GetData())
assert.Equal(t, resultField0[0:test.limit], result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, resultFloat[0:test.limit*Dim], result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
assert.NoError(t, err)
})
}
})
t.Run("test int ID", func(t *testing.T) {
result, err := mergeSegcoreRetrieveResults(context.Background(), []*segcorepb.RetrieveResults{r1, r2}, typeutil.Unlimited)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []int64{1, 2, 3, 4}, result.GetIds().GetIntId().GetData())
assert.Equal(t, []int64{11, 11, 22, 22}, result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, resultFloat, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
assert.NoError(t, err)
})
t.Run("test string ID", func(t *testing.T) {
r1.Ids = &schemapb.IDs{
IdField: &schemapb.IDs_StrId{
StrId: &schemapb.StringArray{
Data: []string{"a", "c"},
}}}
r2.Ids = &schemapb.IDs{
IdField: &schemapb.IDs_StrId{
StrId: &schemapb.StringArray{
Data: []string{"b", "d"},
}}}
result, err := mergeSegcoreRetrieveResults(context.Background(), []*segcorepb.RetrieveResults{r1, r2}, typeutil.Unlimited)
assert.NoError(t, err)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []string{"a", "b", "c", "d"}, result.GetIds().GetStrId().GetData())
assert.Equal(t, []int64{11, 11, 22, 22}, result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, resultFloat, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
assert.NoError(t, err)
})
})
}
func TestResult_mergeInternalRetrieveResults(t *testing.T) {
const (
Dim = 8
Int64FieldName = "Int64Field"
FloatVectorFieldName = "FloatVectorField"
Int64FieldID = common.StartOfUserFieldID + 1
FloatVectorFieldID = common.StartOfUserFieldID + 2
)
Int64Array := []int64{11, 22}
FloatVector := []float32{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 11.0, 22.0, 33.0, 44.0, 55.0, 66.0, 77.0, 88.0}
var fieldDataArray1 []*schemapb.FieldData
fieldDataArray1 = append(fieldDataArray1, genFieldData(Int64FieldName, Int64FieldID, schemapb.DataType_Int64, Int64Array[0:2], 1))
fieldDataArray1 = append(fieldDataArray1, genFieldData(FloatVectorFieldName, FloatVectorFieldID, schemapb.DataType_FloatVector, FloatVector[0:16], Dim))
var fieldDataArray2 []*schemapb.FieldData
fieldDataArray2 = append(fieldDataArray2, genFieldData(Int64FieldName, Int64FieldID, schemapb.DataType_Int64, Int64Array[0:2], 1))
fieldDataArray2 = append(fieldDataArray2, genFieldData(FloatVectorFieldName, FloatVectorFieldID, schemapb.DataType_FloatVector, FloatVector[0:16], Dim))
t.Run("test skip dupPK 2", func(t *testing.T) {
result1 := &internalpb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
},
},
},
FieldsData: fieldDataArray1,
}
result2 := &internalpb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
},
},
},
FieldsData: fieldDataArray2,
}
result, err := mergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{result1, result2}, typeutil.Unlimited)
assert.NoError(t, err)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []int64{0, 1}, result.GetIds().GetIntId().GetData())
assert.Equal(t, Int64Array, result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, FloatVector, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
})
t.Run("test nil results", func(t *testing.T) {
ret, err := mergeInternalRetrieveResult(context.Background(), nil, typeutil.Unlimited)
assert.NoError(t, err)
assert.Empty(t, ret.GetIds())
assert.Empty(t, ret.GetFieldsData())
})
t.Run("test timestamp decided", func(t *testing.T) {
ret1 := &internalpb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
}},
},
FieldsData: []*schemapb.FieldData{
genFieldData(common.TimeStampFieldName, common.TimeStampField, schemapb.DataType_Int64,
[]int64{1, 2}, 1),
genFieldData(Int64FieldName, Int64FieldID, schemapb.DataType_Int64,
[]int64{3, 4}, 1),
},
}
ret2 := &internalpb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{0, 1},
}},
},
FieldsData: []*schemapb.FieldData{
genFieldData(common.TimeStampFieldName, common.TimeStampField, schemapb.DataType_Int64,
[]int64{5, 6}, 1),
genFieldData(Int64FieldName, Int64FieldID, schemapb.DataType_Int64,
[]int64{7, 8}, 1),
},
}
result, err := mergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{ret1, ret2}, typeutil.Unlimited)
assert.NoError(t, err)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []int64{0, 1}, result.GetIds().GetIntId().GetData())
assert.Equal(t, []int64{7, 8}, result.GetFieldsData()[1].GetScalars().GetLongData().Data)
})
t.Run("test merge", func(t *testing.T) {
r1 := &internalpb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{1, 3},
},
},
},
FieldsData: fieldDataArray1,
}
r2 := &internalpb.RetrieveResults{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{2, 4},
},
},
},
FieldsData: fieldDataArray2,
}
resultFloat := []float32{
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
11.0, 22.0, 33.0, 44.0, 55.0, 66.0, 77.0, 88.0,
11.0, 22.0, 33.0, 44.0, 55.0, 66.0, 77.0, 88.0}
t.Run("test limited", func(t *testing.T) {
tests := []struct {
description string
limit int64
}{
{"limit 1", 1},
{"limit 2", 2},
{"limit 3", 3},
{"limit 4", 4},
}
resultIDs := []int64{1, 2, 3, 4}
resultField0 := []int64{11, 11, 22, 22}
for _, test := range tests {
t.Run(test.description, func(t *testing.T) {
result, err := mergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{r1, r2}, test.limit)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, int(test.limit), len(result.GetIds().GetIntId().GetData()))
assert.Equal(t, resultIDs[0:test.limit], result.GetIds().GetIntId().GetData())
assert.Equal(t, resultField0[0:test.limit], result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, resultFloat[0:test.limit*Dim], result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
assert.NoError(t, err)
})
}
})
t.Run("test int ID", func(t *testing.T) {
result, err := mergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{r1, r2}, typeutil.Unlimited)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []int64{1, 2, 3, 4}, result.GetIds().GetIntId().GetData())
assert.Equal(t, []int64{11, 11, 22, 22}, result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, resultFloat, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
assert.NoError(t, err)
})
t.Run("test string ID", func(t *testing.T) {
r1.Ids = &schemapb.IDs{
IdField: &schemapb.IDs_StrId{
StrId: &schemapb.StringArray{
Data: []string{"a", "c"},
},
},
}
r2.Ids = &schemapb.IDs{
IdField: &schemapb.IDs_StrId{
StrId: &schemapb.StringArray{
Data: []string{"b", "d"},
},
},
}
result, err := mergeInternalRetrieveResult(context.Background(), []*internalpb.RetrieveResults{r1, r2}, typeutil.Unlimited)
assert.NoError(t, err)
assert.Equal(t, 2, len(result.GetFieldsData()))
assert.Equal(t, []string{"a", "b", "c", "d"}, result.GetIds().GetStrId().GetData())
assert.Equal(t, []int64{11, 11, 22, 22}, result.GetFieldsData()[0].GetScalars().GetLongData().Data)
assert.InDeltaSlice(t, resultFloat, result.FieldsData[1].GetVectors().GetFloatVector().Data, 10e-10)
assert.NoError(t, err)
})
})
}
func TestResult_reduceSearchResultData(t *testing.T) {
const (
nq = 1
topk = 4
metricType = "L2"
)
t.Run("case1", func(t *testing.T) {
ids := []int64{1, 2, 3, 4}
scores := []float32{-1.0, -2.0, -3.0, -4.0}
topks := []int64{int64(len(ids))}
data1 := genSearchResultData(nq, topk, ids, scores, topks)
data2 := genSearchResultData(nq, topk, ids, scores, topks)
dataArray := make([]*schemapb.SearchResultData, 0)
dataArray = append(dataArray, data1)
dataArray = append(dataArray, data2)
res, err := reduceSearchResultData(context.TODO(), dataArray, nq, topk)
assert.Nil(t, err)
assert.Equal(t, ids, res.Ids.GetIntId().Data)
assert.Equal(t, scores, res.Scores)
})
t.Run("case2", func(t *testing.T) {
ids1 := []int64{1, 2, 3, 4}
scores1 := []float32{-1.0, -2.0, -3.0, -4.0}
topks1 := []int64{int64(len(ids1))}
ids2 := []int64{5, 1, 3, 4}
scores2 := []float32{-1.0, -1.0, -3.0, -4.0}
topks2 := []int64{int64(len(ids2))}
data1 := genSearchResultData(nq, topk, ids1, scores1, topks1)
data2 := genSearchResultData(nq, topk, ids2, scores2, topks2)
dataArray := make([]*schemapb.SearchResultData, 0)
dataArray = append(dataArray, data1)
dataArray = append(dataArray, data2)
res, err := reduceSearchResultData(context.TODO(), dataArray, nq, topk)
assert.Nil(t, err)
assert.ElementsMatch(t, []int64{1, 5, 2, 3}, res.Ids.GetIntId().Data)
})
}
func TestResult_selectSearchResultData_int(t *testing.T) {
type args struct {
dataArray []*schemapb.SearchResultData
resultOffsets [][]int64
offsets []int64
topk int64
nq int64
qi int64
}
t.Run("Integer ID", func(t *testing.T) {
tests := []struct {
name string
args args
want int
}{
{
args: args{
dataArray: []*schemapb.SearchResultData{
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{11, 9, 7, 5, 3, 1},
},
},
},
Scores: []float32{1.1, 0.9, 0.7, 0.5, 0.3, 0.1},
Topks: []int64{2, 2, 2},
},
{
Ids: &schemapb.IDs{
IdField: &schemapb.IDs_IntId{
IntId: &schemapb.LongArray{
Data: []int64{12, 10, 8, 6, 4, 2},
},
},
},
Scores: []float32{1.2, 1.0, 0.8, 0.6, 0.4, 0.2},
Topks: []int64{2, 2, 2},
},
},
resultOffsets: [][]int64{{0, 2, 4}, {0, 2, 4}},
offsets: []int64{0, 1},
topk: 2,
nq: 3,
qi: 0,
},
want: 0,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := selectSearchResultData(tt.args.dataArray, tt.args.resultOffsets, tt.args.offsets, tt.args.qi); got != tt.want {
t.Errorf("selectSearchResultData() = %v, want %v", got, tt.want)
}
})
}
})
//t.Run("Integer ID with bad score", func(t *testing.T) {
// tests := []struct {
// name string
// args args
// want int
// }{
// {
// args: args{
// dataArray: []*schemapb.SearchResultData{
// {
// Ids: &schemapb.IDs{
// IdField: &schemapb.IDs_IntId{
// IntId: &schemapb.LongArray{
// Data: []int64{11, 9, 7, 5, 3, 1},
// },
// },
// },
// Scores: []float32{-math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32},
// Topks: []int64{2, 2, 2},
// },
// {
// Ids: &schemapb.IDs{
// IdField: &schemapb.IDs_IntId{
// IntId: &schemapb.LongArray{
// Data: []int64{12, 10, 8, 6, 4, 2},
// },
// },
// },
// Scores: []float32{-math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32, -math.MaxFloat32},
// Topks: []int64{2, 2, 2},
// },
// },
// resultOffsets: [][]int64{{0, 2, 4}, {0, 2, 4}},
// offsets: []int64{0, 1},
// topk: 2,
// nq: 3,
// qi: 0,
// },
// want: -1,
// },
// }
// for _, tt := range tests {
// t.Run(tt.name, func(t *testing.T) {
// if got := selectSearchResultData(tt.args.dataArray, tt.args.resultOffsets, tt.args.offsets, tt.args.qi); got != tt.want {
// t.Errorf("selectSearchResultData() = %v, want %v", got, tt.want)
// }
// })
// }
//})
}