milvus/tests/go_client/testcases/helper/data_helper.go
ThreadDao a62118ca33
test: update cases for index describe and upsert with autoID (#35191)
issue: #33419

---------

Signed-off-by: ThreadDao <yufen.zong@zilliz.com>
2024-08-05 18:00:16 +08:00

443 lines
12 KiB
Go

package helper
import (
"bytes"
"encoding/json"
"strconv"
"go.uber.org/zap"
"github.com/milvus-io/milvus/client/v2/column"
"github.com/milvus-io/milvus/client/v2/entity"
"github.com/milvus-io/milvus/pkg/log"
"github.com/milvus-io/milvus/tests/go_client/common"
)
// insert params
type InsertParams struct {
Schema *entity.Schema
PartitionName string
IsRows bool
}
func NewInsertParams(schema *entity.Schema) *InsertParams {
return &InsertParams{
Schema: schema,
}
}
func (opt *InsertParams) TWithPartitionName(partitionName string) *InsertParams {
opt.PartitionName = partitionName
return opt
}
func (opt *InsertParams) TWithIsRows(isRows bool) *InsertParams {
opt.IsRows = isRows
return opt
}
// GenColumnDataOption -- create column data --
type GenDataOption struct {
nb int
start int
dim int
maxLen int
sparseMaxLen int
maxCapacity int
elementType entity.FieldType
fieldName string
}
func (opt *GenDataOption) TWithNb(nb int) *GenDataOption {
opt.nb = nb
return opt
}
func (opt *GenDataOption) TWithDim(dim int) *GenDataOption {
opt.dim = dim
return opt
}
func (opt *GenDataOption) TWithMaxLen(maxLen int) *GenDataOption {
opt.maxLen = maxLen
return opt
}
func (opt *GenDataOption) TWithSparseMaxLen(sparseMaxLen int) *GenDataOption {
opt.sparseMaxLen = sparseMaxLen
return opt
}
func (opt *GenDataOption) TWithMaxCapacity(maxCap int) *GenDataOption {
opt.maxCapacity = maxCap
return opt
}
func (opt *GenDataOption) TWithStart(start int) *GenDataOption {
opt.start = start
return opt
}
func (opt *GenDataOption) TWithFieldName(fieldName string) *GenDataOption {
opt.fieldName = fieldName
return opt
}
func (opt *GenDataOption) TWithElementType(eleType entity.FieldType) *GenDataOption {
opt.elementType = eleType
return opt
}
func TNewDataOption() *GenDataOption {
return &GenDataOption{
nb: common.DefaultNb,
start: 0,
dim: common.DefaultDim,
maxLen: common.TestMaxLen,
sparseMaxLen: common.TestMaxLen,
maxCapacity: common.TestCapacity,
elementType: entity.FieldTypeNone,
}
}
func GenArrayColumnData(nb int, eleType entity.FieldType, option GenDataOption) column.Column {
start := option.start
fieldName := option.fieldName
if option.fieldName == "" {
fieldName = GetFieldNameByElementType(eleType)
}
capacity := option.maxCapacity
switch eleType {
case entity.FieldTypeBool:
boolValues := make([][]bool, 0, nb)
for i := start; i < start+nb; i++ {
boolArray := make([]bool, 0, capacity)
for j := 0; j < capacity; j++ {
boolArray = append(boolArray, i%2 == 0)
}
boolValues = append(boolValues, boolArray)
}
return column.NewColumnBoolArray(fieldName, boolValues)
case entity.FieldTypeInt8:
int8Values := make([][]int8, 0, nb)
for i := start; i < start+nb; i++ {
int8Array := make([]int8, 0, capacity)
for j := 0; j < capacity; j++ {
int8Array = append(int8Array, int8(i+j))
}
int8Values = append(int8Values, int8Array)
}
return column.NewColumnInt8Array(fieldName, int8Values)
case entity.FieldTypeInt16:
int16Values := make([][]int16, 0, nb)
for i := start; i < start+nb; i++ {
int16Array := make([]int16, 0, capacity)
for j := 0; j < capacity; j++ {
int16Array = append(int16Array, int16(i+j))
}
int16Values = append(int16Values, int16Array)
}
return column.NewColumnInt16Array(fieldName, int16Values)
case entity.FieldTypeInt32:
int32Values := make([][]int32, 0, nb)
for i := start; i < start+nb; i++ {
int32Array := make([]int32, 0, capacity)
for j := 0; j < capacity; j++ {
int32Array = append(int32Array, int32(i+j))
}
int32Values = append(int32Values, int32Array)
}
return column.NewColumnInt32Array(fieldName, int32Values)
case entity.FieldTypeInt64:
int64Values := make([][]int64, 0, nb)
for i := start; i < start+nb; i++ {
int64Array := make([]int64, 0, capacity)
for j := 0; j < capacity; j++ {
int64Array = append(int64Array, int64(i+j))
}
int64Values = append(int64Values, int64Array)
}
return column.NewColumnInt64Array(fieldName, int64Values)
case entity.FieldTypeFloat:
floatValues := make([][]float32, 0, nb)
for i := start; i < start+nb; i++ {
floatArray := make([]float32, 0, capacity)
for j := 0; j < capacity; j++ {
floatArray = append(floatArray, float32(i+j))
}
floatValues = append(floatValues, floatArray)
}
return column.NewColumnFloatArray(fieldName, floatValues)
case entity.FieldTypeDouble:
doubleValues := make([][]float64, 0, nb)
for i := start; i < start+nb; i++ {
doubleArray := make([]float64, 0, capacity)
for j := 0; j < capacity; j++ {
doubleArray = append(doubleArray, float64(i+j))
}
doubleValues = append(doubleValues, doubleArray)
}
return column.NewColumnDoubleArray(fieldName, doubleValues)
case entity.FieldTypeVarChar:
varcharValues := make([][][]byte, 0, nb)
for i := start; i < start+nb; i++ {
varcharArray := make([][]byte, 0, capacity)
for j := 0; j < capacity; j++ {
var buf bytes.Buffer
buf.WriteString(strconv.Itoa(i + j))
varcharArray = append(varcharArray, buf.Bytes())
}
varcharValues = append(varcharValues, varcharArray)
}
return column.NewColumnVarCharArray(fieldName, varcharValues)
default:
log.Fatal("GenArrayColumnData failed", zap.Any("ElementType", eleType))
return nil
}
}
type JSONStruct struct {
Number int32 `json:"number,omitempty" milvus:"name:number"`
String string `json:"string,omitempty" milvus:"name:string"`
*BoolStruct
List []int64 `json:"list,omitempty" milvus:"name:list"`
}
// GenDefaultJSONData gen default column with data
func GenDefaultJSONData(nb int, option GenDataOption) [][]byte {
jsonValues := make([][]byte, 0, nb)
start := option.start
var m interface{}
for i := start; i < start+nb; i++ {
// kv value
_bool := &BoolStruct{
Bool: i%2 == 0,
}
if i < (start+nb)/2 {
if i%2 == 0 {
m = JSONStruct{
String: strconv.Itoa(i),
BoolStruct: _bool,
}
} else {
m = JSONStruct{
Number: int32(i),
String: strconv.Itoa(i),
BoolStruct: _bool,
List: []int64{int64(i), int64(i + 1)},
}
}
} else {
// int, float, string, list
switch i % 4 {
case 0:
m = i
case 1:
m = float32(i)
case 2:
m = strconv.Itoa(i)
case 3:
m = []int64{int64(i), int64(i + 1)}
}
}
bs, err := json.Marshal(&m)
if err != nil {
log.Fatal("Marshal json field failed", zap.Error(err))
}
jsonValues = append(jsonValues, bs)
}
return jsonValues
}
// GenColumnData GenColumnDataOption except dynamic column
func GenColumnData(nb int, fieldType entity.FieldType, option GenDataOption) column.Column {
dim := option.dim
sparseMaxLen := option.sparseMaxLen
start := option.start
fieldName := option.fieldName
if option.fieldName == "" {
fieldName = GetFieldNameByFieldType(fieldType, TWithElementType(option.elementType))
}
switch fieldType {
case entity.FieldTypeInt64:
int64Values := make([]int64, 0, nb)
for i := start; i < start+nb; i++ {
int64Values = append(int64Values, int64(i))
}
return column.NewColumnInt64(fieldName, int64Values)
case entity.FieldTypeInt8:
int8Values := make([]int8, 0, nb)
for i := start; i < start+nb; i++ {
int8Values = append(int8Values, int8(i))
}
return column.NewColumnInt8(fieldName, int8Values)
case entity.FieldTypeInt16:
int16Values := make([]int16, 0, nb)
for i := start; i < start+nb; i++ {
int16Values = append(int16Values, int16(i))
}
return column.NewColumnInt16(fieldName, int16Values)
case entity.FieldTypeInt32:
int32Values := make([]int32, 0, nb)
for i := start; i < start+nb; i++ {
int32Values = append(int32Values, int32(i))
}
return column.NewColumnInt32(fieldName, int32Values)
case entity.FieldTypeBool:
boolValues := make([]bool, 0, nb)
for i := start; i < start+nb; i++ {
boolValues = append(boolValues, i/2 == 0)
}
return column.NewColumnBool(fieldName, boolValues)
case entity.FieldTypeFloat:
floatValues := make([]float32, 0, nb)
for i := start; i < start+nb; i++ {
floatValues = append(floatValues, float32(i))
}
return column.NewColumnFloat(fieldName, floatValues)
case entity.FieldTypeDouble:
floatValues := make([]float64, 0, nb)
for i := start; i < start+nb; i++ {
floatValues = append(floatValues, float64(i))
}
return column.NewColumnDouble(fieldName, floatValues)
case entity.FieldTypeVarChar:
varcharValues := make([]string, 0, nb)
for i := start; i < start+nb; i++ {
varcharValues = append(varcharValues, strconv.Itoa(i))
}
return column.NewColumnVarChar(fieldName, varcharValues)
case entity.FieldTypeArray:
return GenArrayColumnData(nb, option.elementType, option)
case entity.FieldTypeJSON:
jsonValues := GenDefaultJSONData(nb, option)
return column.NewColumnJSONBytes(fieldName, jsonValues)
case entity.FieldTypeFloatVector:
vecFloatValues := make([][]float32, 0, nb)
for i := start; i < start+nb; i++ {
vec := common.GenFloatVector(dim)
vecFloatValues = append(vecFloatValues, vec)
}
return column.NewColumnFloatVector(fieldName, option.dim, vecFloatValues)
case entity.FieldTypeBinaryVector:
binaryVectors := make([][]byte, 0, nb)
for i := 0; i < nb; i++ {
vec := common.GenBinaryVector(dim)
binaryVectors = append(binaryVectors, vec)
}
return column.NewColumnBinaryVector(fieldName, dim, binaryVectors)
case entity.FieldTypeFloat16Vector:
fp16Vectors := make([][]byte, 0, nb)
for i := start; i < start+nb; i++ {
vec := common.GenFloat16Vector(dim)
fp16Vectors = append(fp16Vectors, vec)
}
return column.NewColumnFloat16Vector(fieldName, dim, fp16Vectors)
case entity.FieldTypeBFloat16Vector:
bf16Vectors := make([][]byte, 0, nb)
for i := start; i < start+nb; i++ {
vec := common.GenBFloat16Vector(dim)
bf16Vectors = append(bf16Vectors, vec)
}
return column.NewColumnBFloat16Vector(fieldName, dim, bf16Vectors)
case entity.FieldTypeSparseVector:
vectors := make([]entity.SparseEmbedding, 0, nb)
for i := start; i < start+nb; i++ {
vec := common.GenSparseVector(sparseMaxLen)
vectors = append(vectors, vec)
}
return column.NewColumnSparseVectors(fieldName, vectors)
default:
log.Fatal("GenColumnData failed", zap.Any("FieldType", fieldType))
return nil
}
}
func GenDynamicColumnData(start int, nb int) []column.Column {
type ListStruct struct {
List []int64 `json:"list" milvus:"name:list"`
}
// gen number, string bool list data column
numberValues := make([]int32, 0, nb)
stringValues := make([]string, 0, nb)
boolValues := make([]bool, 0, nb)
listValues := make([][]byte, 0, nb)
m := make(map[string]interface{})
for i := start; i < start+nb; i++ {
numberValues = append(numberValues, int32(i))
stringValues = append(stringValues, strconv.Itoa(i))
boolValues = append(boolValues, i%3 == 0)
m["list"] = ListStruct{
List: []int64{int64(i), int64(i + 1)},
}
bs, err := json.Marshal(m)
if err != nil {
log.Fatal("Marshal json field failed:", zap.Error(err))
}
listValues = append(listValues, bs)
}
data := []column.Column{
column.NewColumnInt32(common.DefaultDynamicNumberField, numberValues),
column.NewColumnString(common.DefaultDynamicStringField, stringValues),
column.NewColumnBool(common.DefaultDynamicBoolField, boolValues),
column.NewColumnJSONBytes(common.DefaultDynamicListField, listValues),
}
return data
}
func MergeColumnsToDynamic(nb int, columns []column.Column, columnName string) *column.ColumnJSONBytes {
values := make([][]byte, 0, nb)
for i := 0; i < nb; i++ {
m := make(map[string]interface{})
for _, c := range columns {
// range guaranteed
m[c.Name()], _ = c.Get(i)
}
bs, err := json.Marshal(&m)
if err != nil {
log.Fatal("MergeColumnsToDynamic failed:", zap.Error(err))
}
values = append(values, bs)
}
jsonColumn := column.NewColumnJSONBytes(columnName, values).WithIsDynamic(true)
return jsonColumn
}
func GenColumnsBasedSchema(schema *entity.Schema, option *GenDataOption) ([]column.Column, []column.Column) {
if nil == schema || schema.CollectionName == "" {
log.Fatal("[GenColumnsBasedSchema] Nil Schema is not expected")
}
fields := schema.Fields
columns := make([]column.Column, 0, len(fields)+1)
var dynamicColumns []column.Column
for _, field := range fields {
if field.DataType == entity.FieldTypeArray {
option.TWithElementType(field.ElementType)
}
if field.AutoID {
continue
}
columns = append(columns, GenColumnData(option.nb, field.DataType, *option))
}
if schema.EnableDynamicField {
dynamicColumns = GenDynamicColumnData(option.start, option.nb)
}
return columns, dynamicColumns
}