milvus/internal/util/indexcgowrapper/codec_index_test.go
xige-16 158787811e
Move assemble/disassemble func to core (#19420)
Signed-off-by: xige-16 <xi.ge@zilliz.com>

Signed-off-by: xige-16 <xi.ge@zilliz.com>
2022-10-16 21:17:25 +08:00

326 lines
7.6 KiB
Go

//go:build linux
// +build linux
package indexcgowrapper
import (
"math/rand"
"strconv"
"testing"
"github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/schemapb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/internal/storage"
"github.com/milvus-io/milvus/internal/util/funcutil"
)
type indexTestCase struct {
dtype schemapb.DataType
typeParams map[string]string
indexParams map[string]string
}
func generateBoolArray(numRows int) []bool {
ret := make([]bool, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Int()%2 == 0)
}
return ret
}
func generateInt8Array(numRows int) []int8 {
ret := make([]int8, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int8(rand.Int()))
}
return ret
}
func generateInt16Array(numRows int) []int16 {
ret := make([]int16, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int16(rand.Int()))
}
return ret
}
func generateInt32Array(numRows int) []int32 {
ret := make([]int32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int32(rand.Int()))
}
return ret
}
func generateInt64Array(numRows int) []int64 {
ret := make([]int64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, int64(rand.Int()))
}
return ret
}
func generateFloat32Array(numRows int) []float32 {
ret := make([]float32, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateFloat64Array(numRows int) []float64 {
ret := make([]float64, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, rand.Float64())
}
return ret
}
func generateStringArray(numRows int) []string {
ret := make([]string, 0, numRows)
for i := 0; i < numRows; i++ {
ret = append(ret, funcutil.GenRandomStr())
}
return ret
}
func generateFloatVectors(numRows, dim int) []float32 {
total := numRows * dim
ret := make([]float32, 0, total)
for i := 0; i < total; i++ {
ret = append(ret, rand.Float32())
}
return ret
}
func generateBinaryVectors(numRows, dim int) []byte {
total := (numRows * dim) / 8
ret := make([]byte, total)
_, err := rand.Read(ret)
if err != nil {
panic(err)
}
return ret
}
func genFieldData(dtype schemapb.DataType, numRows, dim int) storage.FieldData {
switch dtype {
case schemapb.DataType_Bool:
return &storage.BoolFieldData{
Data: generateBoolArray(numRows),
}
case schemapb.DataType_Int8:
return &storage.Int8FieldData{
Data: generateInt8Array(numRows),
}
case schemapb.DataType_Int16:
return &storage.Int16FieldData{
Data: generateInt16Array(numRows),
}
case schemapb.DataType_Int32:
return &storage.Int32FieldData{
Data: generateInt32Array(numRows),
}
case schemapb.DataType_Int64:
return &storage.Int64FieldData{
Data: generateInt64Array(numRows),
}
case schemapb.DataType_Float:
return &storage.FloatFieldData{
Data: generateFloat32Array(numRows),
}
case schemapb.DataType_Double:
return &storage.DoubleFieldData{
Data: generateFloat64Array(numRows),
}
case schemapb.DataType_String:
return &storage.StringFieldData{
Data: generateStringArray(numRows),
}
case schemapb.DataType_VarChar:
return &storage.StringFieldData{
Data: generateStringArray(numRows),
}
case schemapb.DataType_BinaryVector:
return &storage.BinaryVectorFieldData{
Dim: dim,
Data: generateBinaryVectors(numRows, dim),
}
case schemapb.DataType_FloatVector:
return &storage.FloatVectorFieldData{
Data: generateFloatVectors(numRows, dim),
Dim: dim,
}
default:
return nil
}
}
func genScalarIndexCases(dtype schemapb.DataType) []indexTestCase {
return []indexTestCase{
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": "inverted_index",
},
},
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": "flat",
},
},
}
}
func genStringIndexCases(dtype schemapb.DataType) []indexTestCase {
return []indexTestCase{
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": "inverted_index",
},
},
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": "marisa-trie",
},
},
}
}
func genFloatVecIndexCases(dtype schemapb.DataType) []indexTestCase {
return []indexTestCase{
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": IndexFaissIVFPQ,
"metric_type": L2,
"dim": strconv.Itoa(dim),
"nlist": strconv.Itoa(nlist),
"m": strconv.Itoa(m),
"nbits": strconv.Itoa(nbits),
},
},
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": IndexFaissIVFFlat,
"metric_type": L2,
"dim": strconv.Itoa(dim),
"nlist": strconv.Itoa(nlist),
},
},
}
}
func genBinaryVecIndexCases(dtype schemapb.DataType) []indexTestCase {
return []indexTestCase{
{
dtype: dtype,
typeParams: nil,
indexParams: map[string]string{
"index_type": IndexFaissBinIVFFlat,
"metric_type": Jaccard,
"dim": strconv.Itoa(dim),
"nlist": strconv.Itoa(nlist),
"nbits": strconv.Itoa(nbits),
},
},
}
}
func genTypedIndexCase(dtype schemapb.DataType) []indexTestCase {
switch dtype {
case schemapb.DataType_Bool:
return genScalarIndexCases(dtype)
case schemapb.DataType_Int8:
return genScalarIndexCases(dtype)
case schemapb.DataType_Int16:
return genScalarIndexCases(dtype)
case schemapb.DataType_Int32:
return genScalarIndexCases(dtype)
case schemapb.DataType_Int64:
return genScalarIndexCases(dtype)
case schemapb.DataType_Float:
return genScalarIndexCases(dtype)
case schemapb.DataType_Double:
return genScalarIndexCases(dtype)
case schemapb.DataType_String:
return genScalarIndexCases(dtype)
case schemapb.DataType_VarChar:
return genStringIndexCases(dtype)
case schemapb.DataType_BinaryVector:
return genBinaryVecIndexCases(dtype)
case schemapb.DataType_FloatVector:
return genFloatVecIndexCases(dtype)
default:
return nil
}
}
func genIndexCase() []indexTestCase {
dtypes := []schemapb.DataType{
schemapb.DataType_Bool,
schemapb.DataType_Int8,
schemapb.DataType_Int16,
schemapb.DataType_Int32,
schemapb.DataType_Int64,
schemapb.DataType_Float,
schemapb.DataType_Double,
schemapb.DataType_String,
schemapb.DataType_VarChar,
schemapb.DataType_BinaryVector,
schemapb.DataType_FloatVector,
}
var ret []indexTestCase
for _, dtype := range dtypes {
ret = append(ret, genTypedIndexCase(dtype)...)
}
return ret
}
func genStorageConfig() *indexpb.StorageConfig {
InitOnce.Do(func() {
Params.Init()
})
return &indexpb.StorageConfig{
Address: Params.MinioCfg.Address,
AccessKeyID: Params.MinioCfg.AccessKeyID,
SecretAccessKey: Params.MinioCfg.SecretAccessKey,
BucketName: Params.MinioCfg.BucketName,
RootPath: Params.MinioCfg.RootPath,
IAMEndpoint: Params.MinioCfg.IAMEndpoint,
UseSSL: Params.MinioCfg.UseSSL,
UseIAM: Params.MinioCfg.UseIAM,
}
}
func TestCgoIndex(t *testing.T) {
for _, testCase := range genIndexCase() {
index, err := NewCgoIndex(testCase.dtype, testCase.typeParams, testCase.indexParams, genStorageConfig())
assert.NoError(t, err, testCase)
dataset := GenDataset(genFieldData(testCase.dtype, nb, dim))
assert.NoError(t, index.Build(dataset), testCase)
blobs, err := index.Serialize()
assert.NoError(t, err, testCase)
copyIndex, err := NewCgoIndex(testCase.dtype, testCase.typeParams, testCase.indexParams, genStorageConfig())
assert.NoError(t, err, testCase)
assert.NoError(t, copyIndex.Load(blobs), testCase)
}
}