enhance: autoindex for multi data type (#33868)

issue: #22837 

contain https://github.com/milvus-io/milvus/pull/33625
https://github.com/milvus-io/milvus/pull/33867
https://github.com/milvus-io/milvus/pull/33911 which already merged to
2.4 branch

Signed-off-by: chasingegg <chao.gao@zilliz.com>
Co-authored-by: foxspy <xianliang.li@zilliz.com>
This commit is contained in:
Gao 2024-06-18 21:34:01 +08:00 committed by GitHub
parent 74186091d4
commit a789c60380
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 410 additions and 61 deletions

View File

@ -177,9 +177,21 @@ func (cit *createIndexTask) parseIndexParams() error {
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey] metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
// override params by autoindex if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() { // override float vector index params by autoindex
indexParamsMap[k] = v for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
// override sparse float vector index params by autoindex
for k, v := range Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
// override binary vector index params by autoindex
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
indexParamsMap[k] = v
}
} }
if metricTypeExist { if metricTypeExist {
@ -187,9 +199,7 @@ func (cit *createIndexTask) parseIndexParams() error {
indexParamsMap[common.MetricTypeKey] = metricType indexParamsMap[common.MetricTypeKey] = metricType
} }
} else { // behavior change after 2.2.9, adapt autoindex logic here. } else { // behavior change after 2.2.9, adapt autoindex logic here.
autoIndexConfig := Params.AutoIndexConfig.IndexParams.GetAsJSONMap() useAutoIndex := func(autoIndexConfig map[string]string) {
useAutoIndex := func() {
fields := make([]zap.Field, 0, len(autoIndexConfig)) fields := make([]zap.Field, 0, len(autoIndexConfig))
for k, v := range autoIndexConfig { for k, v := range autoIndexConfig {
indexParamsMap[k] = v indexParamsMap[k] = v
@ -198,13 +208,13 @@ func (cit *createIndexTask) parseIndexParams() error {
log.Ctx(cit.ctx).Info("AutoIndex triggered", fields...) log.Ctx(cit.ctx).Info("AutoIndex triggered", fields...)
} }
handle := func(numberParams int) error { handle := func(numberParams int, autoIndexConfig map[string]string) error {
// empty case. // empty case.
if len(indexParamsMap) == numberParams { if len(indexParamsMap) == numberParams {
// though we already know there must be metric type, how to make this safer to avoid crash? // though we already know there must be metric type, how to make this safer to avoid crash?
metricType := autoIndexConfig[common.MetricTypeKey] metricType := autoIndexConfig[common.MetricTypeKey]
cit.newExtraParams = wrapUserIndexParams(metricType) cit.newExtraParams = wrapUserIndexParams(metricType)
useAutoIndex() useAutoIndex(autoIndexConfig)
return nil return nil
} }
@ -221,7 +231,7 @@ func (cit *createIndexTask) parseIndexParams() error {
// only metric type is passed. // only metric type is passed.
cit.newExtraParams = wrapUserIndexParams(metricType) cit.newExtraParams = wrapUserIndexParams(metricType)
useAutoIndex() useAutoIndex(autoIndexConfig)
// make the users' metric type first class citizen. // make the users' metric type first class citizen.
indexParamsMap[common.MetricTypeKey] = metricType indexParamsMap[common.MetricTypeKey] = metricType
} }
@ -229,12 +239,23 @@ func (cit *createIndexTask) parseIndexParams() error {
return nil return nil
} }
var config map[string]string
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
// override float vector index params by autoindex
config = Params.AutoIndexConfig.IndexParams.GetAsJSONMap()
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
// override sparse float vector index params by autoindex
config = Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
// override binary vector index params by autoindex
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
}
if !exist { if !exist {
if err := handle(0); err != nil { if err := handle(0, config); err != nil {
return err return err
} }
} else if specifyIndexType == AutoIndexName { } else if specifyIndexType == AutoIndexName {
if err := handle(1); err != nil { if err := handle(1, config); err != nil {
return err return err
} }
} }
@ -250,10 +271,21 @@ func (cit *createIndexTask) parseIndexParams() error {
return err return err
} }
} }
if indexType == indexparamcheck.IndexSparseInverted || indexType == indexparamcheck.IndexSparseWand { metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey] if !metricTypeExist {
if !metricTypeExist || metricType != metric.IP { return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "metric type not set for vector index")
return fmt.Errorf("only IP is the supported metric type for sparse index") }
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.FloatVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "float vector index does not support metric type: "+metricType)
}
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
if metricType != metric.IP {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "only IP is the supported metric type for sparse index")
}
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
if !funcutil.SliceContain(indexparamcheck.BinaryVectorMetrics, metricType) {
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "binary vector index does not support metric type: "+metricType)
} }
} }
} }

View File

@ -963,14 +963,109 @@ func Test_wrapUserIndexParams(t *testing.T) {
assert.Equal(t, "L2", params[1].Value) assert.Equal(t, "L2", params[1].Value)
} }
func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) {
paramtable.Init()
mgr := config.NewManager()
mgr.SetConfig("autoIndex.enable", "true")
Params.AutoIndexConfig.Enable.Init(mgr)
mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW"}`)
mgr.SetConfig("autoIndex.params.sparse.build", `{"drop_ratio_build": 0.2, "index_type": "SPARSE_INVERTED_INDEX"}`)
mgr.SetConfig("autoIndex.params.binary.build", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT"}`)
Params.AutoIndexConfig.IndexParams.Init(mgr)
Params.AutoIndexConfig.SparseIndexParams.Init(mgr)
Params.AutoIndexConfig.BinaryIndexParams.Init(mgr)
floatFieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "128"},
},
}
sparseFloatFieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_SparseFloatVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "64"},
},
}
binaryFieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "4096"},
},
}
t.Run("case 1, float vector parameters", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: floatFieldSchema,
req: &milvuspb.CreateIndexRequest{
ExtraParams: []*commonpb.KeyValuePair{
{Key: common.MetricTypeKey, Value: "L2"},
},
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "HNSW"},
{Key: common.MetricTypeKey, Value: "L2"},
{Key: "M", Value: "30"},
{Key: "efConstruction", Value: "360"},
}, task.newIndexParams)
})
t.Run("case 2, sparse vector parameters", func(t *testing.T) {
Params.AutoIndexConfig.IndexParams.Init(mgr)
task := &createIndexTask{
fieldSchema: sparseFloatFieldSchema,
req: &milvuspb.CreateIndexRequest{
ExtraParams: []*commonpb.KeyValuePair{
{Key: common.MetricTypeKey, Value: "IP"},
},
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"},
{Key: common.MetricTypeKey, Value: "IP"},
{Key: "drop_ratio_build", Value: "0.2"},
}, task.newIndexParams)
})
t.Run("case 3, binary vector parameters", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: binaryFieldSchema,
req: &milvuspb.CreateIndexRequest{
ExtraParams: []*commonpb.KeyValuePair{
{Key: common.MetricTypeKey, Value: "JACCARD"},
},
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"},
{Key: common.MetricTypeKey, Value: "JACCARD"},
{Key: "nlist", Value: "1024"},
}, task.newIndexParams)
})
}
func Test_parseIndexParams_AutoIndex(t *testing.T) { func Test_parseIndexParams_AutoIndex(t *testing.T) {
paramtable.Init() paramtable.Init()
mgr := config.NewManager() mgr := config.NewManager()
mgr.SetConfig("autoIndex.enable", "false") mgr.SetConfig("autoIndex.enable", "false")
mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW", "metric_type": "IP"}`) mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW", "metric_type": "IP"}`)
mgr.SetConfig("autoIndex.params.binary.build", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD"}`)
mgr.SetConfig("autoIndex.params.sparse.build", `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`)
Params.AutoIndexConfig.Enable.Init(mgr) Params.AutoIndexConfig.Enable.Init(mgr)
Params.AutoIndexConfig.IndexParams.Init(mgr) Params.AutoIndexConfig.IndexParams.Init(mgr)
Params.AutoIndexConfig.BinaryIndexParams.Init(mgr)
Params.AutoIndexConfig.SparseIndexParams.Init(mgr)
autoIndexConfig := Params.AutoIndexConfig.IndexParams.GetAsJSONMap() autoIndexConfig := Params.AutoIndexConfig.IndexParams.GetAsJSONMap()
autoIndexConfigBinary := Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
autoIndexConfigSparse := Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
fieldSchema := &schemapb.FieldSchema{ fieldSchema := &schemapb.FieldSchema{
DataType: schemapb.DataType_FloatVector, DataType: schemapb.DataType_FloatVector,
TypeParams: []*commonpb.KeyValuePair{ TypeParams: []*commonpb.KeyValuePair{
@ -978,7 +1073,48 @@ func Test_parseIndexParams_AutoIndex(t *testing.T) {
}, },
} }
t.Run("case 1, empty parameters", func(t *testing.T) { fieldSchemaBinary := &schemapb.FieldSchema{
DataType: schemapb.DataType_BinaryVector,
TypeParams: []*commonpb.KeyValuePair{
{Key: common.DimKey, Value: "8"},
},
}
fieldSchemaSparse := &schemapb.FieldSchema{
DataType: schemapb.DataType_SparseFloatVector,
}
t.Run("case 1, empty parameters binary", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: fieldSchemaBinary,
req: &milvuspb.CreateIndexRequest{
ExtraParams: make([]*commonpb.KeyValuePair, 0),
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfigBinary[common.MetricTypeKey]},
}, task.newExtraParams)
})
t.Run("case 1, empty parameters sparse", func(t *testing.T) {
task := &createIndexTask{
fieldSchema: fieldSchemaSparse,
req: &milvuspb.CreateIndexRequest{
ExtraParams: make([]*commonpb.KeyValuePair, 0),
},
}
err := task.parseIndexParams()
assert.NoError(t, err)
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
{Key: common.IndexTypeKey, Value: AutoIndexName},
{Key: common.MetricTypeKey, Value: autoIndexConfigSparse[common.MetricTypeKey]},
}, task.newExtraParams)
})
t.Run("case 1, empty parameters float vector", func(t *testing.T) {
task := &createIndexTask{ task := &createIndexTask{
fieldSchema: fieldSchema, fieldSchema: fieldSchema,
req: &milvuspb.CreateIndexRequest{ req: &milvuspb.CreateIndexRequest{

View File

@ -62,6 +62,7 @@ func OptimizeSearchParams(ctx context.Context, req *querypb.SearchRequest, query
common.SearchParamKey: queryInfo.GetSearchParams(), common.SearchParamKey: queryInfo.GetSearchParams(),
common.SegmentNumKey: estSegmentNum, common.SegmentNumKey: estSegmentNum,
common.WithFilterKey: withFilter, common.WithFilterKey: withFilter,
common.DataTypeKey: int32(plan.GetVectorAnns().GetVectorType()),
common.WithOptimizeKey: paramtable.Get().AutoIndexConfig.EnableOptimize.GetAsBool(), common.WithOptimizeKey: paramtable.Get().AutoIndexConfig.EnableOptimize.GetAsBool(),
common.CollectionKey: req.GetReq().GetCollectionID(), common.CollectionKey: req.GetReq().GetCollectionID(),
} }

View File

@ -106,6 +106,7 @@ const (
SearchParamKey = "search_param" SearchParamKey = "search_param"
SegmentNumKey = "segment_num" SegmentNumKey = "segment_num"
WithFilterKey = "with_filter" WithFilterKey = "with_filter"
DataTypeKey = "data_type"
WithOptimizeKey = "with_optimize" WithOptimizeKey = "with_optimize"
CollectionKey = "collection" CollectionKey = "collection"

View File

@ -40,7 +40,7 @@ func (c baseChecker) CheckValidDataType(dType schemapb.DataType) error {
return nil return nil
} }
func (c baseChecker) SetDefaultMetricTypeIfNotExist(m map[string]string) {} func (c baseChecker) SetDefaultMetricTypeIfNotExist(m map[string]string, dType schemapb.DataType) {}
func (c baseChecker) StaticCheck(params map[string]string) error { func (c baseChecker) StaticCheck(params map[string]string) error {
return errors.New("unsupported index type") return errors.New("unsupported index type")

View File

@ -34,7 +34,7 @@ func (c binaryVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) err
return nil return nil
} }
func (c binaryVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) { func (c binaryVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType) setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType)
} }

View File

@ -46,8 +46,10 @@ const (
SparseDropRatioBuild = "drop_ratio_build" SparseDropRatioBuild = "drop_ratio_build"
) )
// METRICS is a set of all metrics types supported for float vector. var (
var METRICS = []string{metric.L2, metric.IP, metric.COSINE} // const FloatVectorMetrics = []string{metric.L2, metric.IP, metric.COSINE} // const
BinaryVectorMetrics = []string{metric.HAMMING, metric.JACCARD, metric.SUBSTRUCTURE, metric.SUPERSTRUCTURE} // const
)
// BinIDMapMetrics is a set of all metric types supported for binary vector. // BinIDMapMetrics is a set of all metric types supported for binary vector.
var ( var (

View File

@ -13,8 +13,8 @@ type floatVectorBaseChecker struct {
} }
func (c floatVectorBaseChecker) staticCheck(params map[string]string) error { func (c floatVectorBaseChecker) staticCheck(params map[string]string) error {
if !CheckStrByValues(params, Metric, METRICS) { if !CheckStrByValues(params, Metric, FloatVectorMetrics) {
return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], METRICS) return fmt.Errorf("metric type %s not found or not supported, supported: %v", params[Metric], FloatVectorMetrics)
} }
return nil return nil
@ -35,7 +35,7 @@ func (c floatVectorBaseChecker) CheckValidDataType(dType schemapb.DataType) erro
return nil return nil
} }
func (c floatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) { func (c floatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType) setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType)
} }

View File

@ -4,11 +4,12 @@ import (
"fmt" "fmt"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb" "github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/util/typeutil" "github.com/milvus-io/milvus/pkg/util/typeutil"
) )
type hnswChecker struct { type hnswChecker struct {
floatVectorBaseChecker baseChecker
} }
func (c hnswChecker) StaticCheck(params map[string]string) error { func (c hnswChecker) StaticCheck(params map[string]string) error {
@ -38,6 +39,16 @@ func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error {
return nil return nil
} }
func (c hnswChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
if typeutil.IsDenseFloatVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, FloatVectorDefaultMetricType)
} else if typeutil.IsSparseFloatVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType)
} else if typeutil.IsBinaryVectorType(dType) {
setDefaultIfNotExist(params, common.MetricTypeKey, BinaryVectorDefaultMetricType)
}
}
func newHnswChecker() IndexChecker { func newHnswChecker() IndexChecker {
return &hnswChecker{} return &hnswChecker{}
} }

View File

@ -172,3 +172,42 @@ func Test_hnswChecker_CheckValidDataType(t *testing.T) {
} }
} }
} }
func Test_hnswChecker_SetDefaultMetricType(t *testing.T) {
cases := []struct {
dType schemapb.DataType
metricType string
}{
{
dType: schemapb.DataType_FloatVector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_Float16Vector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_BFloat16Vector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_SparseFloatVector,
metricType: metric.IP,
},
{
dType: schemapb.DataType_BinaryVector,
metricType: metric.JACCARD,
},
}
c := newHnswChecker()
for _, test := range cases {
p := map[string]string{
DIM: strconv.Itoa(128),
HNSWM: strconv.Itoa(16),
EFConstruction: strconv.Itoa(200),
}
c.SetDefaultMetricTypeIfNotExist(p, test.dType)
assert.Equal(t, p[Metric], test.metricType)
}
}

View File

@ -23,6 +23,6 @@ import (
type IndexChecker interface { type IndexChecker interface {
CheckTrain(map[string]string) error CheckTrain(map[string]string) error
CheckValidDataType(dType schemapb.DataType) error CheckValidDataType(dType schemapb.DataType) error
SetDefaultMetricTypeIfNotExist(map[string]string) SetDefaultMetricTypeIfNotExist(map[string]string, schemapb.DataType)
StaticCheck(map[string]string) error StaticCheck(map[string]string) error
} }

View File

@ -39,7 +39,7 @@ func (c sparseFloatVectorBaseChecker) CheckValidDataType(dType schemapb.DataType
return nil return nil
} }
func (c sparseFloatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string) { func (c sparseFloatVectorBaseChecker) SetDefaultMetricTypeIfNotExist(params map[string]string, dType schemapb.DataType) {
setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType) setDefaultIfNotExist(params, common.MetricTypeKey, SparseFloatVectorDefaultMetricType)
} }

View File

@ -362,6 +362,10 @@ func AppendPrepareLoadParams(params *paramtable.ComponentParam, indexParams map[
for k, v := range params.AutoIndexConfig.PrepareParams.GetAsJSONMap() { for k, v := range params.AutoIndexConfig.PrepareParams.GetAsJSONMap() {
indexParams[k] = v indexParams[k] = v
} }
for k, v := range params.AutoIndexConfig.LoadAdaptParams.GetAsJSONMap() {
indexParams[k] = v
}
} }
return nil return nil
} }

View File

@ -583,7 +583,7 @@ func TestBigDataIndex_parse(t *testing.T) {
} }
func TestAppendPrepareInfo_parse(t *testing.T) { func TestAppendPrepareInfo_parse(t *testing.T) {
t.Run("parse prepare info", func(t *testing.T) { t.Run("parse load info", func(t *testing.T) {
var params paramtable.ComponentParam var params paramtable.ComponentParam
params.Init(paramtable.NewBaseTable(paramtable.SkipRemote(true))) params.Init(paramtable.NewBaseTable(paramtable.SkipRemote(true)))
params.Save(params.AutoIndexConfig.Enable.Key, "true") params.Save(params.AutoIndexConfig.Enable.Key, "true")
@ -593,9 +593,16 @@ func TestAppendPrepareInfo_parse(t *testing.T) {
assert.NoError(t, err) assert.NoError(t, err)
params.Save(params.AutoIndexConfig.PrepareParams.Key, string(str)) params.Save(params.AutoIndexConfig.PrepareParams.Key, string(str))
mapString2 := make(map[string]string)
mapString2["key2"] = "value2"
str2, err2 := json.Marshal(mapString2)
assert.NoError(t, err2)
params.Save(params.AutoIndexConfig.LoadAdaptParams.Key, string(str2))
resultMapString := make(map[string]string) resultMapString := make(map[string]string)
err = AppendPrepareLoadParams(&params, resultMapString) err = AppendPrepareLoadParams(&params, resultMapString)
assert.NoError(t, err) assert.NoError(t, err)
assert.Equal(t, resultMapString["key1"], "value1") assert.Equal(t, resultMapString["key1"], "value1")
assert.Equal(t, resultMapString["key2"], "value2")
}) })
} }

View File

@ -19,6 +19,7 @@ package paramtable
import ( import (
"fmt" "fmt"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/config" "github.com/milvus-io/milvus/pkg/config"
"github.com/milvus-io/milvus/pkg/util/funcutil" "github.com/milvus-io/milvus/pkg/util/funcutil"
@ -32,7 +33,10 @@ type autoIndexConfig struct {
EnableOptimize ParamItem `refreshable:"true"` EnableOptimize ParamItem `refreshable:"true"`
IndexParams ParamItem `refreshable:"true"` IndexParams ParamItem `refreshable:"true"`
SparseIndexParams ParamItem `refreshable:"true"`
BinaryIndexParams ParamItem `refreshable:"true"`
PrepareParams ParamItem `refreshable:"true"` PrepareParams ParamItem `refreshable:"true"`
LoadAdaptParams ParamItem `refreshable:"true"`
ExtraParams ParamItem `refreshable:"true"` ExtraParams ParamItem `refreshable:"true"`
IndexType ParamItem `refreshable:"true"` IndexType ParamItem `refreshable:"true"`
AutoIndexTypeName ParamItem `refreshable:"true"` AutoIndexTypeName ParamItem `refreshable:"true"`
@ -71,12 +75,34 @@ func (p *autoIndexConfig) init(base *BaseTable) {
} }
p.IndexParams.Init(base.mgr) p.IndexParams.Init(base.mgr)
p.SparseIndexParams = ParamItem{
Key: "autoIndex.params.sparse.build",
Version: "2.4.5",
DefaultValue: `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`,
Export: true,
}
p.SparseIndexParams.Init(base.mgr)
p.BinaryIndexParams = ParamItem{
Key: "autoIndex.params.binary.build",
Version: "2.4.5",
DefaultValue: `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD"}`,
Export: true,
}
p.BinaryIndexParams.Init(base.mgr)
p.PrepareParams = ParamItem{ p.PrepareParams = ParamItem{
Key: "autoIndex.params.prepare", Key: "autoIndex.params.prepare",
Version: "2.3.2", Version: "2.3.2",
} }
p.PrepareParams.Init(base.mgr) p.PrepareParams.Init(base.mgr)
p.LoadAdaptParams = ParamItem{
Key: "autoIndex.params.load",
Version: "2.4.5",
}
p.LoadAdaptParams.Init(base.mgr)
p.ExtraParams = ParamItem{ p.ExtraParams = ParamItem{
Key: "autoIndex.params.extra", Key: "autoIndex.params.extra",
Version: "2.2.0", Version: "2.2.0",
@ -168,31 +194,36 @@ func (p *autoIndexConfig) init(base *BaseTable) {
} }
func (p *autoIndexConfig) panicIfNotValidAndSetDefaultMetricType(mgr *config.Manager) { func (p *autoIndexConfig) panicIfNotValidAndSetDefaultMetricType(mgr *config.Manager) {
m := p.IndexParams.GetAsJSONMap() p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.BinaryIndexParams.Key, p.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector, mgr)
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.SparseIndexParams.Key, p.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector, mgr)
}
func (p *autoIndexConfig) panicIfNotValidAndSetDefaultMetricTypeHelper(key string, m map[string]string, dtype schemapb.DataType, mgr *config.Manager) {
if m == nil { if m == nil {
panic("autoIndex.build not invalid, should be json format") panic(fmt.Sprintf("%s invalid, should be json format", key))
} }
indexType, ok := m[common.IndexTypeKey] indexType, ok := m[common.IndexTypeKey]
if !ok { if !ok {
panic("autoIndex.build not invalid, index type not found") panic(fmt.Sprintf("%s invalid, index type not found", key))
} }
checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType) checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType)
if err != nil { if err != nil {
panic(fmt.Sprintf("autoIndex.build not invalid, unsupported index type: %s", indexType)) panic(fmt.Sprintf("%s invalid, unsupported index type: %s", key, indexType))
} }
checker.SetDefaultMetricTypeIfNotExist(m) checker.SetDefaultMetricTypeIfNotExist(m, dtype)
if err := checker.StaticCheck(m); err != nil { if err := checker.StaticCheck(m); err != nil {
panic(fmt.Sprintf("autoIndex.build not invalid, parameters not invalid, error: %s", err.Error())) panic(fmt.Sprintf("%s invalid, parameters invalid, error: %s", key, err.Error()))
} }
p.reset(m, mgr) p.reset(key, m, mgr)
} }
func (p *autoIndexConfig) reset(m map[string]string, mgr *config.Manager) { func (p *autoIndexConfig) reset(key string, m map[string]string, mgr *config.Manager) {
j := funcutil.MapToJSON(m) j := funcutil.MapToJSON(m)
mgr.SetConfig("autoIndex.params.build", string(j)) mgr.SetConfig(key, string(j))
} }

View File

@ -23,6 +23,7 @@ import (
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
"github.com/milvus-io/milvus/pkg/common" "github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/config" "github.com/milvus-io/milvus/pkg/config"
"github.com/milvus-io/milvus/pkg/util/indexparamcheck" "github.com/milvus-io/milvus/pkg/util/indexparamcheck"
@ -66,6 +67,56 @@ func TestAutoIndexParams_build(t *testing.T) {
assert.Equal(t, strconv.Itoa(map2["nlist"].(int)), CParams.AutoIndexConfig.IndexParams.GetAsJSONMap()["nlist"]) assert.Equal(t, strconv.Itoa(map2["nlist"].(int)), CParams.AutoIndexConfig.IndexParams.GetAsJSONMap()["nlist"])
}) })
t.Run("test parseSparseBuildParams success", func(t *testing.T) {
// Params := CParams.AutoIndexConfig
// buildParams := make([string]interface)
var err error
map1 := map[string]any{
IndexTypeKey: "SPARSE_INVERTED_INDEX",
"drop_ratio_build": 0.1,
}
var jsonStrBytes []byte
jsonStrBytes, err = json.Marshal(map1)
assert.NoError(t, err)
bt.Save(CParams.AutoIndexConfig.SparseIndexParams.Key, string(jsonStrBytes))
assert.Equal(t, "SPARSE_INVERTED_INDEX", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()[IndexTypeKey])
assert.Equal(t, "0.1", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()["drop_ratio_build"])
map2 := map[string]interface{}{
IndexTypeKey: "SPARSE_WAND",
"drop_ratio_build": 0.2,
}
jsonStrBytes, err = json.Marshal(map2)
assert.NoError(t, err)
bt.Save(CParams.AutoIndexConfig.SparseIndexParams.Key, string(jsonStrBytes))
assert.Equal(t, "SPARSE_WAND", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()[IndexTypeKey])
assert.Equal(t, "0.2", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()["drop_ratio_build"])
})
t.Run("test parseBinaryParams success", func(t *testing.T) {
// Params := CParams.AutoIndexConfig
// buildParams := make([string]interface)
var err error
map1 := map[string]any{
IndexTypeKey: "BIN_IVF_FLAT",
"nlist": 768,
}
var jsonStrBytes []byte
jsonStrBytes, err = json.Marshal(map1)
assert.NoError(t, err)
bt.Save(CParams.AutoIndexConfig.BinaryIndexParams.Key, string(jsonStrBytes))
assert.Equal(t, "BIN_IVF_FLAT", CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()[IndexTypeKey])
assert.Equal(t, strconv.Itoa(map1["nlist"].(int)), CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()["nlist"])
map2 := map[string]interface{}{
IndexTypeKey: "BIN_FLAT",
}
jsonStrBytes, err = json.Marshal(map2)
assert.NoError(t, err)
bt.Save(CParams.AutoIndexConfig.BinaryIndexParams.Key, string(jsonStrBytes))
assert.Equal(t, "BIN_FLAT", CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()[IndexTypeKey])
})
t.Run("test parsePrepareParams success", func(t *testing.T) { t.Run("test parsePrepareParams success", func(t *testing.T) {
var err error var err error
map1 := map[string]any{ map1 := map[string]any{
@ -90,7 +141,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.Panics(t, func() { assert.Panics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
}) })
@ -104,7 +155,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.Panics(t, func() { assert.Panics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
}) })
@ -118,7 +169,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.Panics(t, func() { assert.Panics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
}) })
@ -132,7 +183,58 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.NotPanics(t, func() { assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.FloatVectorDefaultMetricType, metricType)
})
t.Run("normal case, binary vector", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.binary.build", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT"}`)
p := &autoIndexConfig{
BinaryIndexParams: ParamItem{
Key: "autoIndex.params.binary.build",
},
}
p.BinaryIndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.BinaryIndexParams.Key, p.BinaryIndexParams.GetAsJSONMap(), schemapb.DataType_BinaryVector, mgr)
})
metricType, exist := p.BinaryIndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.BinaryVectorDefaultMetricType, metricType)
})
t.Run("normal case, sparse vector", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.sparse.build", `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`)
p := &autoIndexConfig{
SparseIndexParams: ParamItem{
Key: "autoIndex.params.sparse.build",
},
}
p.SparseIndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.SparseIndexParams.Key, p.SparseIndexParams.GetAsJSONMap(), schemapb.DataType_SparseFloatVector, mgr)
})
metricType, exist := p.SparseIndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.SparseFloatVectorDefaultMetricType, metricType)
})
t.Run("normal case, ivf flat", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.build", `{"nlist": 30, "index_type": "IVF_FLAT"}`)
p := &autoIndexConfig{
IndexParams: ParamItem{
Key: "autoIndex.params.build",
},
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey] metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist) assert.True(t, exist)
@ -149,24 +251,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.NotPanics(t, func() { assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
})
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist)
assert.Equal(t, indexparamcheck.FloatVectorDefaultMetricType, metricType)
})
t.Run("normal case, ivf flat", func(t *testing.T) {
mgr := config.NewManager()
mgr.SetConfig("autoIndex.params.build", `{"nlist": 30, "index_type": "IVF_FLAT"}`)
p := &autoIndexConfig{
IndexParams: ParamItem{
Key: "autoIndex.params.build",
},
}
p.IndexParams.Init(mgr)
assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr)
}) })
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey] metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist) assert.True(t, exist)
@ -183,7 +268,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.NotPanics(t, func() { assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey] metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist) assert.True(t, exist)
@ -200,7 +285,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.NotPanics(t, func() { assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey] metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist) assert.True(t, exist)
@ -217,7 +302,7 @@ func Test_autoIndexConfig_panicIfNotValid(t *testing.T) {
} }
p.IndexParams.Init(mgr) p.IndexParams.Init(mgr)
assert.NotPanics(t, func() { assert.NotPanics(t, func() {
p.panicIfNotValidAndSetDefaultMetricType(mgr) p.panicIfNotValidAndSetDefaultMetricTypeHelper(p.IndexParams.Key, p.IndexParams.GetAsJSONMap(), schemapb.DataType_FloatVector, mgr)
}) })
metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey] metricType, exist := p.IndexParams.GetAsJSONMap()[common.MetricTypeKey]
assert.True(t, exist) assert.True(t, exist)