2023-03-24 17:09:58 +08:00
package proxy
import (
2024-03-12 12:45:03 +08:00
"encoding/json"
2023-03-24 17:09:58 +08:00
"fmt"
2023-05-16 20:19:22 +08:00
"math"
2023-09-28 19:01:27 +08:00
"reflect"
2023-03-24 17:09:58 +08:00
2023-09-21 09:45:27 +08:00
"go.uber.org/zap"
2023-06-09 01:28:37 +08:00
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
2024-08-21 00:22:54 +08:00
"github.com/milvus-io/milvus/pkg/common"
2023-05-15 16:15:21 +08:00
"github.com/milvus-io/milvus/pkg/log"
2023-04-06 19:14:32 +08:00
"github.com/milvus-io/milvus/pkg/util/funcutil"
"github.com/milvus-io/milvus/pkg/util/merr"
2023-12-21 11:40:42 +08:00
"github.com/milvus-io/milvus/pkg/util/parameterutil"
2023-05-12 11:33:20 +08:00
"github.com/milvus-io/milvus/pkg/util/paramtable"
2023-04-06 19:14:32 +08:00
"github.com/milvus-io/milvus/pkg/util/typeutil"
2023-03-24 17:09:58 +08:00
)
type validateUtil struct {
2023-05-16 20:19:22 +08:00
checkNAN bool
checkMaxLen bool
checkOverflow bool
2023-09-19 14:23:23 +08:00
checkMaxCap bool
2023-03-24 17:09:58 +08:00
}
type validateOption func ( * validateUtil )
func withNANCheck ( ) validateOption {
return func ( v * validateUtil ) {
v . checkNAN = true
}
}
func withMaxLenCheck ( ) validateOption {
return func ( v * validateUtil ) {
v . checkMaxLen = true
}
}
2023-05-16 20:19:22 +08:00
func withOverflowCheck ( ) validateOption {
return func ( v * validateUtil ) {
v . checkOverflow = true
}
}
2023-09-19 14:23:23 +08:00
func withMaxCapCheck ( ) validateOption {
return func ( v * validateUtil ) {
v . checkMaxCap = true
}
}
2023-03-24 17:09:58 +08:00
func ( v * validateUtil ) apply ( opts ... validateOption ) {
for _ , opt := range opts {
opt ( v )
}
}
2024-08-15 19:18:53 +08:00
func ( v * validateUtil ) Validate ( data [ ] * schemapb . FieldData , helper * typeutil . SchemaHelper , numRows uint64 ) error {
if helper == nil {
return merr . WrapErrServiceInternal ( "nil schema helper provided for Validation" )
2023-03-24 17:09:58 +08:00
}
for _ , field := range data {
fieldSchema , err := helper . GetFieldFromName ( field . GetFieldName ( ) )
if err != nil {
return err
}
switch fieldSchema . GetDataType ( ) {
case schemapb . DataType_FloatVector :
if err := v . checkFloatVectorFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2023-09-08 10:03:16 +08:00
case schemapb . DataType_Float16Vector :
if err := v . checkFloat16VectorFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2024-01-11 15:48:51 +08:00
case schemapb . DataType_BFloat16Vector :
if err := v . checkBFloat16VectorFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2023-03-24 17:09:58 +08:00
case schemapb . DataType_BinaryVector :
if err := v . checkBinaryVectorFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2024-03-14 05:32:54 +08:00
case schemapb . DataType_SparseFloatVector :
if err := v . checkSparseFloatFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2023-03-24 17:09:58 +08:00
case schemapb . DataType_VarChar :
if err := v . checkVarCharFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2023-05-12 11:33:20 +08:00
case schemapb . DataType_JSON :
if err := v . checkJSONFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2024-05-06 21:15:29 +08:00
case schemapb . DataType_Int8 , schemapb . DataType_Int16 , schemapb . DataType_Int32 :
2023-05-16 20:19:22 +08:00
if err := v . checkIntegerFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2024-05-06 21:15:29 +08:00
case schemapb . DataType_Int64 :
if err := v . checkLongFieldData ( field , fieldSchema ) ; err != nil {
return err
}
case schemapb . DataType_Float :
if err := v . checkFloatFieldData ( field , fieldSchema ) ; err != nil {
return err
}
case schemapb . DataType_Double :
if err := v . checkDoubleFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2023-09-19 14:23:23 +08:00
case schemapb . DataType_Array :
if err := v . checkArrayFieldData ( field , fieldSchema ) ; err != nil {
return err
}
2023-03-24 17:09:58 +08:00
default :
}
}
2024-08-15 19:18:53 +08:00
err := v . fillWithValue ( data , helper , int ( numRows ) )
2023-06-15 10:24:38 +08:00
if err != nil {
return err
}
if err := v . checkAligned ( data , helper , numRows ) ; err != nil {
return err
}
2023-03-24 17:09:58 +08:00
return nil
}
func ( v * validateUtil ) checkAligned ( data [ ] * schemapb . FieldData , schema * typeutil . SchemaHelper , numRows uint64 ) error {
2024-02-21 10:16:53 +08:00
errNumRowsMismatch := func ( fieldName string , fieldNumRows uint64 ) error {
msg := fmt . Sprintf ( "the num_rows (%d) of field (%s) is not equal to passed num_rows (%d)" , fieldNumRows , fieldName , numRows )
2024-08-07 09:56:28 +08:00
return merr . WrapErrParameterInvalid ( numRows , fieldNumRows , msg )
2023-03-24 17:09:58 +08:00
}
2024-03-25 19:19:07 +08:00
errDimMismatch := func ( fieldName string , dataDim int64 , schemaDim int64 ) error {
msg := fmt . Sprintf ( "the dim (%d) of field data(%s) is not equal to schema dim (%d)" , dataDim , fieldName , schemaDim )
2024-06-18 21:39:59 +08:00
return merr . WrapErrParameterInvalid ( schemaDim , dataDim , msg )
2024-03-25 19:19:07 +08:00
}
2023-03-24 17:09:58 +08:00
for _ , field := range data {
switch field . GetType ( ) {
case schemapb . DataType_FloatVector :
f , err := schema . GetFieldFromName ( field . GetFieldName ( ) )
if err != nil {
return err
}
dim , err := typeutil . GetDim ( f )
if err != nil {
return err
}
n , err := funcutil . GetNumRowsOfFloatVectorField ( field . GetVectors ( ) . GetFloatVector ( ) . GetData ( ) , dim )
if err != nil {
return err
}
2024-03-25 19:19:07 +08:00
dataDim := field . GetVectors ( ) . Dim
if dataDim != dim {
return errDimMismatch ( field . GetFieldName ( ) , dataDim , dim )
}
2023-03-24 17:09:58 +08:00
if n != numRows {
2024-02-21 10:16:53 +08:00
return errNumRowsMismatch ( field . GetFieldName ( ) , n )
2023-03-24 17:09:58 +08:00
}
case schemapb . DataType_BinaryVector :
f , err := schema . GetFieldFromName ( field . GetFieldName ( ) )
if err != nil {
return err
}
dim , err := typeutil . GetDim ( f )
if err != nil {
return err
}
2024-03-25 19:19:07 +08:00
dataDim := field . GetVectors ( ) . Dim
if dataDim != dim {
return errDimMismatch ( field . GetFieldName ( ) , dataDim , dim )
}
2023-03-24 17:09:58 +08:00
n , err := funcutil . GetNumRowsOfBinaryVectorField ( field . GetVectors ( ) . GetBinaryVector ( ) , dim )
if err != nil {
return err
}
if n != numRows {
2024-02-21 10:16:53 +08:00
return errNumRowsMismatch ( field . GetFieldName ( ) , n )
2023-03-24 17:09:58 +08:00
}
2023-09-08 10:03:16 +08:00
case schemapb . DataType_Float16Vector :
f , err := schema . GetFieldFromName ( field . GetFieldName ( ) )
if err != nil {
return err
}
dim , err := typeutil . GetDim ( f )
if err != nil {
return err
}
2024-03-25 19:19:07 +08:00
dataDim := field . GetVectors ( ) . Dim
if dataDim != dim {
return errDimMismatch ( field . GetFieldName ( ) , dataDim , dim )
}
2023-09-08 10:03:16 +08:00
n , err := funcutil . GetNumRowsOfFloat16VectorField ( field . GetVectors ( ) . GetFloat16Vector ( ) , dim )
if err != nil {
return err
}
if n != numRows {
2024-02-21 10:16:53 +08:00
return errNumRowsMismatch ( field . GetFieldName ( ) , n )
2023-09-08 10:03:16 +08:00
}
2024-01-11 15:48:51 +08:00
case schemapb . DataType_BFloat16Vector :
f , err := schema . GetFieldFromName ( field . GetFieldName ( ) )
if err != nil {
return err
}
dim , err := typeutil . GetDim ( f )
if err != nil {
return err
}
2024-03-25 19:19:07 +08:00
dataDim := field . GetVectors ( ) . Dim
if dataDim != dim {
return errDimMismatch ( field . GetFieldName ( ) , dataDim , dim )
}
2024-01-11 15:48:51 +08:00
n , err := funcutil . GetNumRowsOfBFloat16VectorField ( field . GetVectors ( ) . GetBfloat16Vector ( ) , dim )
if err != nil {
return err
}
if n != numRows {
2024-02-21 10:16:53 +08:00
return errNumRowsMismatch ( field . GetFieldName ( ) , n )
2024-01-11 15:48:51 +08:00
}
2024-03-14 05:32:54 +08:00
case schemapb . DataType_SparseFloatVector :
n := uint64 ( len ( field . GetVectors ( ) . GetSparseFloatVector ( ) . Contents ) )
if n != numRows {
return errNumRowsMismatch ( field . GetFieldName ( ) , n )
}
2023-03-24 17:09:58 +08:00
default :
// error won't happen here.
2024-05-07 10:07:29 +08:00
n , err := funcutil . GetNumRowOfFieldDataWithSchema ( field , schema )
2023-03-24 17:09:58 +08:00
if err != nil {
return err
}
if n != numRows {
2024-05-16 11:57:35 +08:00
log . Warn ( "the num_rows of field is not equal to passed num_rows" , zap . String ( "fieldName" , field . GetFieldName ( ) ) ,
zap . Int64 ( "fieldNumRows" , int64 ( n ) ) , zap . Int64 ( "passedNumRows" , int64 ( numRows ) ) ,
zap . Bools ( "ValidData" , field . GetValidData ( ) ) )
2024-02-21 10:16:53 +08:00
return errNumRowsMismatch ( field . GetFieldName ( ) , n )
2023-03-24 17:09:58 +08:00
}
}
}
return nil
}
2024-05-16 11:57:35 +08:00
// fill data in two situation
// 1. has no default_value, if nullable,
// will fill nullValue when passed num_rows not equal to expected num_rows
// 2. has default_value,
// will fill default_value when passed num_rows not equal to expected num_rows,
//
// after fillWithValue, only nullable field will has valid_data, the length of all data will be passed num_rows
func ( v * validateUtil ) fillWithValue ( data [ ] * schemapb . FieldData , schema * typeutil . SchemaHelper , numRows int ) error {
2023-05-15 16:15:21 +08:00
for _ , field := range data {
fieldSchema , err := schema . GetFieldFromName ( field . GetFieldName ( ) )
if err != nil {
return err
}
if fieldSchema . GetDefaultValue ( ) == nil {
2024-05-16 11:57:35 +08:00
err = v . fillWithNullValue ( field , fieldSchema , numRows )
if err != nil {
return err
}
} else {
err = v . fillWithDefaultValue ( field , fieldSchema , numRows )
if err != nil {
return err
}
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
return nil
}
func ( v * validateUtil ) fillWithNullValue ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema , numRows int ) error {
err := checkValidData ( field , fieldSchema , numRows )
if err != nil {
return err
}
switch field . Field . ( type ) {
case * schemapb . FieldData_Scalars :
switch sd := field . GetScalars ( ) . GetData ( ) . ( type ) {
case * schemapb . ScalarField_BoolData :
if fieldSchema . GetNullable ( ) {
sd . BoolData . Data , err = fillWithNullValueImpl ( sd . BoolData . Data , field . GetValidData ( ) )
if err != nil {
return err
}
}
case * schemapb . ScalarField_IntData :
if fieldSchema . GetNullable ( ) {
sd . IntData . Data , err = fillWithNullValueImpl ( sd . IntData . Data , field . GetValidData ( ) )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_LongData :
if fieldSchema . GetNullable ( ) {
sd . LongData . Data , err = fillWithNullValueImpl ( sd . LongData . Data , field . GetValidData ( ) )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_FloatData :
if fieldSchema . GetNullable ( ) {
sd . FloatData . Data , err = fillWithNullValueImpl ( sd . FloatData . Data , field . GetValidData ( ) )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_DoubleData :
if fieldSchema . GetNullable ( ) {
sd . DoubleData . Data , err = fillWithNullValueImpl ( sd . DoubleData . Data , field . GetValidData ( ) )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_StringData :
if fieldSchema . GetNullable ( ) {
sd . StringData . Data , err = fillWithNullValueImpl ( sd . StringData . Data , field . GetValidData ( ) )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_ArrayData :
2024-09-19 17:35:11 +08:00
if fieldSchema . GetNullable ( ) {
sd . ArrayData . Data , err = fillWithNullValueImpl ( sd . ArrayData . Data , field . GetValidData ( ) )
if err != nil {
return err
}
}
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_JsonData :
if fieldSchema . GetNullable ( ) {
sd . JsonData . Data , err = fillWithNullValueImpl ( sd . JsonData . Data , field . GetValidData ( ) )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
}
default :
return merr . WrapErrParameterInvalidMsg ( fmt . Sprintf ( "undefined data type:%s" , field . Type . String ( ) ) )
}
case * schemapb . FieldData_Vectors :
default :
return merr . WrapErrParameterInvalidMsg ( fmt . Sprintf ( "undefined data type:%s" , field . Type . String ( ) ) )
}
return nil
}
func ( v * validateUtil ) fillWithDefaultValue ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema , numRows int ) error {
var err error
switch field . Field . ( type ) {
case * schemapb . FieldData_Scalars :
switch sd := field . GetScalars ( ) . GetData ( ) . ( type ) {
case * schemapb . ScalarField_BoolData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetBoolData ( )
sd . BoolData . Data , err = fillWithDefaultValueImpl ( sd . BoolData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
case * schemapb . ScalarField_IntData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetIntData ( )
sd . IntData . Data , err = fillWithDefaultValueImpl ( sd . IntData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
case * schemapb . ScalarField_LongData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetLongData ( )
sd . LongData . Data , err = fillWithDefaultValueImpl ( sd . LongData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_FloatData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetFloatData ( )
sd . FloatData . Data , err = fillWithDefaultValueImpl ( sd . FloatData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
2023-05-15 16:15:21 +08:00
2024-05-16 11:57:35 +08:00
case * schemapb . ScalarField_DoubleData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetDoubleData ( )
sd . DoubleData . Data , err = fillWithDefaultValueImpl ( sd . DoubleData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
case * schemapb . ScalarField_StringData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetStringData ( )
sd . StringData . Data , err = fillWithDefaultValueImpl ( sd . StringData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
case * schemapb . ScalarField_ArrayData :
// Todo: support it
log . Error ( "array type not support default value" , zap . String ( "fieldSchemaName" , field . GetFieldName ( ) ) )
return merr . WrapErrParameterInvalid ( "not set default value" , "" , "array type not support default value" )
case * schemapb . ScalarField_JsonData :
if len ( field . GetValidData ( ) ) != numRows {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( numRows , len ( field . GetValidData ( ) ) , msg )
}
defaultValue := fieldSchema . GetDefaultValue ( ) . GetBytesData ( )
sd . JsonData . Data , err = fillWithDefaultValueImpl ( sd . JsonData . Data , defaultValue , field . GetValidData ( ) )
if err != nil {
return err
}
2023-05-15 16:15:21 +08:00
default :
2024-05-16 11:57:35 +08:00
return merr . WrapErrParameterInvalidMsg ( fmt . Sprintf ( "undefined data type:%s" , field . Type . String ( ) ) )
2023-05-15 16:15:21 +08:00
}
2024-05-16 11:57:35 +08:00
case * schemapb . FieldData_Vectors :
log . Error ( "vector not support default value" , zap . String ( "fieldSchemaName" , field . GetFieldName ( ) ) )
return merr . WrapErrParameterInvalidMsg ( "vector type not support default value" )
default :
return merr . WrapErrParameterInvalidMsg ( fmt . Sprintf ( "undefined data type:%s" , field . Type . String ( ) ) )
}
2024-09-19 17:35:11 +08:00
if ! typeutil . IsVectorType ( field . Type ) {
if fieldSchema . GetNullable ( ) {
validData := make ( [ ] bool , numRows )
for i := range validData {
validData [ i ] = true
}
field . ValidData = validData
} else {
field . ValidData = [ ] bool { }
}
}
2024-05-16 11:57:35 +08:00
err = checkValidData ( field , fieldSchema , numRows )
if err != nil {
return err
2023-05-15 16:15:21 +08:00
}
return nil
}
2024-05-16 11:57:35 +08:00
func checkValidData ( data * schemapb . FieldData , schema * schemapb . FieldSchema , numRows int ) error {
expectedNum := 0
// if nullable, the length of ValidData is numRows
if schema . GetNullable ( ) {
expectedNum = numRows
}
if len ( data . GetValidData ( ) ) != expectedNum {
msg := fmt . Sprintf ( "the length of valid_data of field(%s) is wrong" , data . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( expectedNum , len ( data . GetValidData ( ) ) , msg )
}
return nil
}
func fillWithNullValueImpl [ T any ] ( array [ ] T , validData [ ] bool ) ( [ ] T , error ) {
n := getValidNumber ( validData )
if len ( array ) != n {
return nil , merr . WrapErrParameterInvalid ( n , len ( array ) , "the length of field is wrong" )
}
if n == len ( validData ) {
return array , nil
}
res := make ( [ ] T , len ( validData ) )
srcIdx := 0
for i , v := range validData {
if v {
res [ i ] = array [ srcIdx ]
srcIdx ++
}
}
return res , nil
}
func fillWithDefaultValueImpl [ T any ] ( array [ ] T , value T , validData [ ] bool ) ( [ ] T , error ) {
n := getValidNumber ( validData )
if len ( array ) != n {
return nil , merr . WrapErrParameterInvalid ( n , len ( array ) , "the length of field is wrong" )
}
if n == len ( validData ) {
return array , nil
}
res := make ( [ ] T , len ( validData ) )
srcIdx := 0
for i , v := range validData {
if v {
res [ i ] = array [ srcIdx ]
srcIdx ++
} else {
res [ i ] = value
}
}
return res , nil
}
func getValidNumber ( validData [ ] bool ) int {
res := 0
for _ , v := range validData {
if v {
res ++
}
}
return res
}
2023-03-24 17:09:58 +08:00
func ( v * validateUtil ) checkFloatVectorFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
floatArray := field . GetVectors ( ) . GetFloatVector ( ) . GetData ( )
if floatArray == nil {
msg := fmt . Sprintf ( "float vector field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need float vector" , "got nil" , msg )
}
if v . checkNAN {
return typeutil . VerifyFloats32 ( floatArray )
}
return nil
}
2023-09-08 10:03:16 +08:00
func ( v * validateUtil ) checkFloat16VectorFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
2024-04-09 14:19:27 +08:00
float16VecArray := field . GetVectors ( ) . GetFloat16Vector ( )
if float16VecArray == nil {
msg := fmt . Sprintf ( "float16 float field '%v' is illegal, nil Vector_Float16 type" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need vector_float16 array" , "got nil" , msg )
}
if v . checkNAN {
return typeutil . VerifyFloats16 ( float16VecArray )
}
2023-09-08 10:03:16 +08:00
return nil
}
2024-01-11 15:48:51 +08:00
func ( v * validateUtil ) checkBFloat16VectorFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
2024-04-09 14:19:27 +08:00
bfloat16VecArray := field . GetVectors ( ) . GetBfloat16Vector ( )
if bfloat16VecArray == nil {
msg := fmt . Sprintf ( "bfloat16 float field '%v' is illegal, nil Vector_BFloat16 type" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need vector_bfloat16 array" , "got nil" , msg )
}
if v . checkNAN {
return typeutil . VerifyBFloats16 ( bfloat16VecArray )
}
2024-01-11 15:48:51 +08:00
return nil
}
2023-03-24 17:09:58 +08:00
func ( v * validateUtil ) checkBinaryVectorFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
2024-05-06 21:15:29 +08:00
bVecArray := field . GetVectors ( ) . GetBinaryVector ( )
if bVecArray == nil {
msg := fmt . Sprintf ( "binary float vector field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need bytes array" , "got nil" , msg )
}
2023-03-24 17:09:58 +08:00
return nil
}
2024-03-14 05:32:54 +08:00
func ( v * validateUtil ) checkSparseFloatFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
if field . GetVectors ( ) == nil || field . GetVectors ( ) . GetSparseFloatVector ( ) == nil {
msg := fmt . Sprintf ( "sparse float field '%v' is illegal, nil SparseFloatVector" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need sparse float array" , "got nil" , msg )
}
sparseRows := field . GetVectors ( ) . GetSparseFloatVector ( ) . GetContents ( )
if sparseRows == nil {
msg := fmt . Sprintf ( "sparse float field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need sparse float array" , "got nil" , msg )
}
return typeutil . ValidateSparseFloatRows ( sparseRows ... )
}
2023-03-24 17:09:58 +08:00
func ( v * validateUtil ) checkVarCharFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
strArr := field . GetScalars ( ) . GetStringData ( ) . GetData ( )
2024-05-16 11:57:35 +08:00
if strArr == nil && fieldSchema . GetDefaultValue ( ) == nil && ! fieldSchema . GetNullable ( ) {
2023-03-24 17:09:58 +08:00
msg := fmt . Sprintf ( "varchar field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need string array" , "got nil" , msg )
}
2023-07-21 11:30:59 +08:00
// fieldSchema autoID is true means that field is pk and primaryData is auto generated
// no need to do max length check
// ignore the parameter of MaxLength
// related https://github.com/milvus-io/milvus/issues/25580
if v . checkMaxLen && ! fieldSchema . AutoID {
2023-06-15 10:24:38 +08:00
maxLength , err := parameterutil . GetMaxLength ( fieldSchema )
2023-03-24 17:09:58 +08:00
if err != nil {
return err
}
2024-04-12 15:01:19 +08:00
if i , ok := verifyLengthPerRow ( strArr , maxLength ) ; ! ok {
2024-07-01 21:16:07 +08:00
return merr . WrapErrParameterInvalidMsg ( "length of varchar field %s exceeds max length, row number: %d, length: %d, max length: %d" ,
fieldSchema . GetName ( ) , i , len ( strArr [ i ] ) , maxLength )
2024-04-12 15:01:19 +08:00
}
return nil
2023-03-24 17:09:58 +08:00
}
return nil
}
2023-05-12 11:33:20 +08:00
func ( v * validateUtil ) checkJSONFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
jsonArray := field . GetScalars ( ) . GetJsonData ( ) . GetData ( )
if jsonArray == nil {
2023-08-09 20:33:15 +08:00
msg := fmt . Sprintf ( "json field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
2023-10-19 17:24:07 +08:00
return merr . WrapErrParameterInvalid ( "need json array" , "got nil" , msg )
2023-05-12 11:33:20 +08:00
}
if v . checkMaxLen {
2023-09-04 17:47:48 +08:00
for _ , s := range jsonArray {
if int64 ( len ( s ) ) > paramtable . Get ( ) . CommonCfg . JSONMaxLength . GetAsInt64 ( ) {
if field . GetIsDynamic ( ) {
msg := fmt . Sprintf ( "the length (%d) of dynamic field exceeds max length (%d)" , len ( s ) ,
paramtable . Get ( ) . CommonCfg . JSONMaxLength . GetAsInt64 ( ) )
return merr . WrapErrParameterInvalid ( "valid length dynamic field" , "length exceeds max length" , msg )
}
msg := fmt . Sprintf ( "the length (%d) of json field (%s) exceeds max length (%d)" , len ( s ) ,
field . GetFieldName ( ) , paramtable . Get ( ) . CommonCfg . JSONMaxLength . GetAsInt64 ( ) )
return merr . WrapErrParameterInvalid ( "valid length json string" , "length exceeds max length" , msg )
}
}
2023-05-12 11:33:20 +08:00
}
2024-03-12 12:45:03 +08:00
if fieldSchema . GetIsDynamic ( ) {
var jsonMap map [ string ] interface { }
for _ , data := range jsonArray {
err := json . Unmarshal ( data , & jsonMap )
if err != nil {
log . Warn ( "insert invalid JSON data, milvus only support json map without nesting" ,
zap . ByteString ( "data" , data ) ,
zap . Error ( err ) ,
)
return merr . WrapErrIoFailedReason ( err . Error ( ) )
}
}
}
2023-05-12 11:33:20 +08:00
return nil
}
2023-05-16 20:19:22 +08:00
func ( v * validateUtil ) checkIntegerFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
data := field . GetScalars ( ) . GetIntData ( ) . GetData ( )
2024-05-16 11:57:35 +08:00
if data == nil && fieldSchema . GetDefaultValue ( ) == nil && ! fieldSchema . GetNullable ( ) {
2023-05-16 20:19:22 +08:00
msg := fmt . Sprintf ( "field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need int array" , "got nil" , msg )
}
2024-05-06 21:15:29 +08:00
if v . checkOverflow {
switch fieldSchema . GetDataType ( ) {
case schemapb . DataType_Int8 :
return verifyOverflowByRange ( data , math . MinInt8 , math . MaxInt8 )
case schemapb . DataType_Int16 :
return verifyOverflowByRange ( data , math . MinInt16 , math . MaxInt16 )
}
}
return nil
}
func ( v * validateUtil ) checkLongFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
data := field . GetScalars ( ) . GetLongData ( ) . GetData ( )
2024-05-16 11:57:35 +08:00
if data == nil && fieldSchema . GetDefaultValue ( ) == nil && ! fieldSchema . GetNullable ( ) {
2024-05-06 21:15:29 +08:00
msg := fmt . Sprintf ( "field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need long int array" , "got nil" , msg )
}
return nil
}
func ( v * validateUtil ) checkFloatFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
data := field . GetScalars ( ) . GetFloatData ( ) . GetData ( )
2024-05-16 11:57:35 +08:00
if data == nil && fieldSchema . GetDefaultValue ( ) == nil && ! fieldSchema . GetNullable ( ) {
2024-05-06 21:15:29 +08:00
msg := fmt . Sprintf ( "field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need float32 array" , "got nil" , msg )
}
if v . checkNAN {
return typeutil . VerifyFloats32 ( data )
}
return nil
}
func ( v * validateUtil ) checkDoubleFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
data := field . GetScalars ( ) . GetDoubleData ( ) . GetData ( )
2024-05-16 11:57:35 +08:00
if data == nil && fieldSchema . GetDefaultValue ( ) == nil && ! fieldSchema . GetNullable ( ) {
2024-05-06 21:15:29 +08:00
msg := fmt . Sprintf ( "field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
return merr . WrapErrParameterInvalid ( "need float64(double) array" , "got nil" , msg )
}
if v . checkNAN {
return typeutil . VerifyFloats64 ( data )
2023-05-16 20:19:22 +08:00
}
return nil
}
2023-09-28 19:01:27 +08:00
func ( v * validateUtil ) checkArrayElement ( array * schemapb . ArrayArray , field * schemapb . FieldSchema ) error {
switch field . GetElementType ( ) {
case schemapb . DataType_Bool :
for _ , row := range array . GetData ( ) {
2024-05-17 17:29:37 +08:00
if row . GetData ( ) == nil {
return merr . WrapErrParameterInvalid ( "bool array" , "nil array" , "insert data does not match" )
}
2023-09-28 19:01:27 +08:00
actualType := reflect . TypeOf ( row . GetData ( ) )
if actualType != reflect . TypeOf ( ( * schemapb . ScalarField_BoolData ) ( nil ) ) {
return merr . WrapErrParameterInvalid ( "bool array" ,
fmt . Sprintf ( "%s array" , actualType . String ( ) ) , "insert data does not match" )
}
}
case schemapb . DataType_Int8 , schemapb . DataType_Int16 , schemapb . DataType_Int32 :
for _ , row := range array . GetData ( ) {
2024-05-17 17:29:37 +08:00
if row . GetData ( ) == nil {
return merr . WrapErrParameterInvalid ( "int array" , "nil array" , "insert data does not match" )
}
2023-09-28 19:01:27 +08:00
actualType := reflect . TypeOf ( row . GetData ( ) )
if actualType != reflect . TypeOf ( ( * schemapb . ScalarField_IntData ) ( nil ) ) {
return merr . WrapErrParameterInvalid ( "int array" ,
fmt . Sprintf ( "%s array" , actualType . String ( ) ) , "insert data does not match" )
}
if v . checkOverflow {
if field . GetElementType ( ) == schemapb . DataType_Int8 {
if err := verifyOverflowByRange ( row . GetIntData ( ) . GetData ( ) , math . MinInt8 , math . MaxInt8 ) ; err != nil {
return err
}
}
if field . GetElementType ( ) == schemapb . DataType_Int16 {
if err := verifyOverflowByRange ( row . GetIntData ( ) . GetData ( ) , math . MinInt16 , math . MaxInt16 ) ; err != nil {
return err
}
}
}
}
case schemapb . DataType_Int64 :
for _ , row := range array . GetData ( ) {
2024-05-17 17:29:37 +08:00
if row . GetData ( ) == nil {
return merr . WrapErrParameterInvalid ( "int64 array" , "nil array" , "insert data does not match" )
}
2023-09-28 19:01:27 +08:00
actualType := reflect . TypeOf ( row . GetData ( ) )
if actualType != reflect . TypeOf ( ( * schemapb . ScalarField_LongData ) ( nil ) ) {
return merr . WrapErrParameterInvalid ( "int64 array" ,
fmt . Sprintf ( "%s array" , actualType . String ( ) ) , "insert data does not match" )
}
}
case schemapb . DataType_Float :
for _ , row := range array . GetData ( ) {
2024-05-17 17:29:37 +08:00
if row . GetData ( ) == nil {
return merr . WrapErrParameterInvalid ( "float array" , "nil array" , "insert data does not match" )
}
2023-09-28 19:01:27 +08:00
actualType := reflect . TypeOf ( row . GetData ( ) )
if actualType != reflect . TypeOf ( ( * schemapb . ScalarField_FloatData ) ( nil ) ) {
return merr . WrapErrParameterInvalid ( "float array" ,
fmt . Sprintf ( "%s array" , actualType . String ( ) ) , "insert data does not match" )
}
}
case schemapb . DataType_Double :
for _ , row := range array . GetData ( ) {
2024-05-17 17:29:37 +08:00
if row . GetData ( ) == nil {
return merr . WrapErrParameterInvalid ( "double array" , "nil array" , "insert data does not match" )
}
2023-09-28 19:01:27 +08:00
actualType := reflect . TypeOf ( row . GetData ( ) )
if actualType != reflect . TypeOf ( ( * schemapb . ScalarField_DoubleData ) ( nil ) ) {
return merr . WrapErrParameterInvalid ( "double array" ,
fmt . Sprintf ( "%s array" , actualType . String ( ) ) , "insert data does not match" )
}
}
case schemapb . DataType_VarChar , schemapb . DataType_String :
for _ , row := range array . GetData ( ) {
2024-05-17 17:29:37 +08:00
if row . GetData ( ) == nil {
return merr . WrapErrParameterInvalid ( "string array" , "nil array" , "insert data does not match" )
}
2023-09-28 19:01:27 +08:00
actualType := reflect . TypeOf ( row . GetData ( ) )
if actualType != reflect . TypeOf ( ( * schemapb . ScalarField_StringData ) ( nil ) ) {
return merr . WrapErrParameterInvalid ( "string array" ,
fmt . Sprintf ( "%s array" , actualType . String ( ) ) , "insert data does not match" )
}
}
}
return nil
}
2023-09-19 14:23:23 +08:00
func ( v * validateUtil ) checkArrayFieldData ( field * schemapb . FieldData , fieldSchema * schemapb . FieldSchema ) error {
data := field . GetScalars ( ) . GetArrayData ( )
if data == nil {
2023-10-19 17:24:07 +08:00
elementTypeStr := fieldSchema . GetElementType ( ) . String ( )
2023-09-19 14:23:23 +08:00
msg := fmt . Sprintf ( "array field '%v' is illegal, array type mismatch" , field . GetFieldName ( ) )
2023-10-19 17:24:07 +08:00
expectStr := fmt . Sprintf ( "need %s array" , elementTypeStr )
return merr . WrapErrParameterInvalid ( expectStr , "got nil" , msg )
2023-09-19 14:23:23 +08:00
}
if v . checkMaxCap {
maxCapacity , err := parameterutil . GetMaxCapacity ( fieldSchema )
if err != nil {
return err
}
if err := verifyCapacityPerRow ( data . GetData ( ) , maxCapacity , fieldSchema . GetElementType ( ) ) ; err != nil {
return err
}
}
if typeutil . IsStringType ( data . GetElementType ( ) ) && v . checkMaxLen {
maxLength , err := parameterutil . GetMaxLength ( fieldSchema )
if err != nil {
return err
}
2024-04-12 15:01:19 +08:00
for rowCnt , row := range data . GetData ( ) {
if i , ok := verifyLengthPerRow ( row . GetStringData ( ) . GetData ( ) , maxLength ) ; ! ok {
2024-07-01 21:16:07 +08:00
return merr . WrapErrParameterInvalidMsg ( "length of %s array field \"%s\" exceeds max length, row number: %d, array index: %d, length: %d, max length: %d" ,
fieldSchema . GetDataType ( ) . String ( ) , fieldSchema . GetName ( ) , rowCnt , i , len ( row . GetStringData ( ) . GetData ( ) [ i ] ) , maxLength ,
)
2023-09-19 14:23:23 +08:00
}
}
}
2023-09-28 19:01:27 +08:00
return v . checkArrayElement ( data , fieldSchema )
2023-09-19 14:23:23 +08:00
}
2024-04-12 15:01:19 +08:00
func verifyLengthPerRow [ E interface { ~ string | ~ [ ] byte } ] ( strArr [ ] E , maxLength int64 ) ( int , bool ) {
2023-03-24 17:09:58 +08:00
for i , s := range strArr {
if int64 ( len ( s ) ) > maxLength {
2024-04-12 15:01:19 +08:00
return i , false
2023-03-24 17:09:58 +08:00
}
}
2024-04-12 15:01:19 +08:00
return 0 , true
2023-03-24 17:09:58 +08:00
}
2023-09-19 14:23:23 +08:00
func verifyCapacityPerRow ( arrayArray [ ] * schemapb . ScalarField , maxCapacity int64 , elementType schemapb . DataType ) error {
for i , array := range arrayArray {
2023-10-19 17:24:07 +08:00
arrayLen := 0
2023-09-19 14:23:23 +08:00
switch elementType {
case schemapb . DataType_Bool :
2023-10-19 17:24:07 +08:00
arrayLen = len ( array . GetBoolData ( ) . GetData ( ) )
2023-09-19 14:23:23 +08:00
case schemapb . DataType_Int8 , schemapb . DataType_Int16 , schemapb . DataType_Int32 :
2023-10-19 17:24:07 +08:00
arrayLen = len ( array . GetIntData ( ) . GetData ( ) )
2023-09-19 14:23:23 +08:00
case schemapb . DataType_Int64 :
2023-10-19 17:24:07 +08:00
arrayLen = len ( array . GetLongData ( ) . GetData ( ) )
2023-09-19 14:23:23 +08:00
case schemapb . DataType_String , schemapb . DataType_VarChar :
2023-10-19 17:24:07 +08:00
arrayLen = len ( array . GetStringData ( ) . GetData ( ) )
2023-09-19 14:23:23 +08:00
case schemapb . DataType_Float :
2023-10-19 17:24:07 +08:00
arrayLen = len ( array . GetFloatData ( ) . GetData ( ) )
2023-09-19 14:23:23 +08:00
case schemapb . DataType_Double :
2023-10-19 17:24:07 +08:00
arrayLen = len ( array . GetDoubleData ( ) . GetData ( ) )
2023-09-19 14:23:23 +08:00
default :
msg := fmt . Sprintf ( "array element type: %s is not supported" , elementType . String ( ) )
return merr . WrapErrParameterInvalid ( "valid array element type" , "array element type is not supported" , msg )
}
2023-10-19 17:24:07 +08:00
if int64 ( arrayLen ) <= maxCapacity {
continue
}
msg := fmt . Sprintf ( "the length (%d) of %dth array exceeds max capacity (%d)" , arrayLen , i , maxCapacity )
2023-09-19 14:23:23 +08:00
return merr . WrapErrParameterInvalid ( "valid length array" , "array length exceeds max capacity" , msg )
}
return nil
}
2023-05-16 20:19:22 +08:00
func verifyOverflowByRange ( arr [ ] int32 , lb int64 , ub int64 ) error {
for idx , e := range arr {
if lb > int64 ( e ) || ub < int64 ( e ) {
msg := fmt . Sprintf ( "the %dth element (%d) out of range: [%d, %d]" , idx , e , lb , ub )
return merr . WrapErrParameterInvalid ( "integer doesn't overflow" , "out of range" , msg )
}
}
return nil
}
2023-03-24 17:09:58 +08:00
func newValidateUtil ( opts ... validateOption ) * validateUtil {
v := & validateUtil {
2023-05-16 20:19:22 +08:00
checkNAN : true ,
checkMaxLen : false ,
checkOverflow : false ,
2023-03-24 17:09:58 +08:00
}
v . apply ( opts ... )
return v
}
2024-08-21 00:22:54 +08:00
func ValidateAutoIndexMmapConfig ( isVectorField bool , indexParams map [ string ] string ) error {
if ! Params . AutoIndexConfig . Enable . GetAsBool ( ) {
return nil
}
_ , ok := indexParams [ common . MmapEnabledKey ]
if ok && isVectorField {
return fmt . Errorf ( "mmap index is not supported to config for the collection in auto index mode" )
}
return nil
}