mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-03 04:19:18 +08:00
enhance: [cherry-pick] add autoindex mapping for binary/sparse datatype (#33625)
issue: #22837 pr: #33624 Signed-off-by: xianliang.li <xianliang.li@zilliz.com>
This commit is contained in:
parent
443197bdfc
commit
58a7111599
@ -12,7 +12,7 @@
|
|||||||
#-------------------------------------------------------------------------------
|
#-------------------------------------------------------------------------------
|
||||||
|
|
||||||
# Update KNOWHERE_VERSION for the first occurrence
|
# Update KNOWHERE_VERSION for the first occurrence
|
||||||
set( KNOWHERE_VERSION v2.3.4 )
|
set( KNOWHERE_VERSION v2.3.5 )
|
||||||
set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git")
|
set( GIT_REPOSITORY "https://github.com/zilliztech/knowhere.git")
|
||||||
message(STATUS "Knowhere repo: ${GIT_REPOSITORY}")
|
message(STATUS "Knowhere repo: ${GIT_REPOSITORY}")
|
||||||
message(STATUS "Knowhere version: ${KNOWHERE_VERSION}")
|
message(STATUS "Knowhere version: ${KNOWHERE_VERSION}")
|
||||||
|
@ -177,10 +177,22 @@ func (cit *createIndexTask) parseIndexParams() error {
|
|||||||
|
|
||||||
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
||||||
|
|
||||||
// override params by autoindex
|
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
|
||||||
|
// override float vector index params by autoindex
|
||||||
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
|
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
|
||||||
indexParamsMap[k] = v
|
indexParamsMap[k] = v
|
||||||
}
|
}
|
||||||
|
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
|
||||||
|
// override sparse float vector index params by autoindex
|
||||||
|
for k, v := range Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap() {
|
||||||
|
indexParamsMap[k] = v
|
||||||
|
}
|
||||||
|
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
|
||||||
|
// override binary vector index params by autoindex
|
||||||
|
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
|
||||||
|
indexParamsMap[k] = v
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if metricTypeExist {
|
if metricTypeExist {
|
||||||
// make the users' metric type first class citizen.
|
// make the users' metric type first class citizen.
|
||||||
|
@ -963,6 +963,95 @@ func Test_wrapUserIndexParams(t *testing.T) {
|
|||||||
assert.Equal(t, "L2", params[1].Value)
|
assert.Equal(t, "L2", params[1].Value)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func Test_parseIndexParams_AutoIndex_WithType(t *testing.T) {
|
||||||
|
paramtable.Init()
|
||||||
|
mgr := config.NewManager()
|
||||||
|
mgr.SetConfig("autoIndex.enable", "true")
|
||||||
|
Params.AutoIndexConfig.Enable.Init(mgr)
|
||||||
|
|
||||||
|
mgr.SetConfig("autoIndex.params.build", `{"M": 30,"efConstruction": 360,"index_type": "HNSW"}`)
|
||||||
|
mgr.SetConfig("autoIndex.params.sparsebuild", `{"drop_ratio_build": 0.2, "index_type": "SPARSE_INVERTED_INDEX"}`)
|
||||||
|
mgr.SetConfig("autoIndex.params.binarybuild", `{"nlist": 1024, "index_type": "BIN_IVF_FLAT"}`)
|
||||||
|
Params.AutoIndexConfig.IndexParams.Init(mgr)
|
||||||
|
Params.AutoIndexConfig.SparseIndexParams.Init(mgr)
|
||||||
|
Params.AutoIndexConfig.BinaryIndexParams.Init(mgr)
|
||||||
|
|
||||||
|
floatFieldSchema := &schemapb.FieldSchema{
|
||||||
|
DataType: schemapb.DataType_FloatVector,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.DimKey, Value: "128"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
sparseFloatFieldSchema := &schemapb.FieldSchema{
|
||||||
|
DataType: schemapb.DataType_SparseFloatVector,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.DimKey, Value: "64"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
binaryFieldSchema := &schemapb.FieldSchema{
|
||||||
|
DataType: schemapb.DataType_BinaryVector,
|
||||||
|
TypeParams: []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.DimKey, Value: "4096"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
t.Run("case 1, float vector parameters", func(t *testing.T) {
|
||||||
|
task := &createIndexTask{
|
||||||
|
fieldSchema: floatFieldSchema,
|
||||||
|
req: &milvuspb.CreateIndexRequest{
|
||||||
|
ExtraParams: []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.MetricTypeKey, Value: "L2"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err := task.parseIndexParams()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.IndexTypeKey, Value: "HNSW"},
|
||||||
|
{Key: common.MetricTypeKey, Value: "L2"},
|
||||||
|
{Key: "M", Value: "30"},
|
||||||
|
{Key: "efConstruction", Value: "360"},
|
||||||
|
}, task.newIndexParams)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("case 2, sparse vector parameters", func(t *testing.T) {
|
||||||
|
Params.AutoIndexConfig.IndexParams.Init(mgr)
|
||||||
|
task := &createIndexTask{
|
||||||
|
fieldSchema: sparseFloatFieldSchema,
|
||||||
|
req: &milvuspb.CreateIndexRequest{
|
||||||
|
ExtraParams: []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.MetricTypeKey, Value: "IP"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err := task.parseIndexParams()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.IndexTypeKey, Value: "SPARSE_INVERTED_INDEX"},
|
||||||
|
{Key: common.MetricTypeKey, Value: "IP"},
|
||||||
|
{Key: "drop_ratio_build", Value: "0.2"},
|
||||||
|
}, task.newIndexParams)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("case 3, binary vector parameters", func(t *testing.T) {
|
||||||
|
task := &createIndexTask{
|
||||||
|
fieldSchema: binaryFieldSchema,
|
||||||
|
req: &milvuspb.CreateIndexRequest{
|
||||||
|
ExtraParams: []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.MetricTypeKey, Value: "JACCARD"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
err := task.parseIndexParams()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.ElementsMatch(t, []*commonpb.KeyValuePair{
|
||||||
|
{Key: common.IndexTypeKey, Value: "BIN_IVF_FLAT"},
|
||||||
|
{Key: common.MetricTypeKey, Value: "JACCARD"},
|
||||||
|
{Key: "nlist", Value: "1024"},
|
||||||
|
}, task.newIndexParams)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
func Test_parseIndexParams_AutoIndex(t *testing.T) {
|
func Test_parseIndexParams_AutoIndex(t *testing.T) {
|
||||||
paramtable.Init()
|
paramtable.Init()
|
||||||
mgr := config.NewManager()
|
mgr := config.NewManager()
|
||||||
|
@ -33,8 +33,8 @@ func (c hnswChecker) CheckTrain(params map[string]string) error {
|
|||||||
|
|
||||||
func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error {
|
func (c hnswChecker) CheckValidDataType(dType schemapb.DataType) error {
|
||||||
// TODO(SPARSE) we'll add sparse vector support in HNSW later in cardinal
|
// TODO(SPARSE) we'll add sparse vector support in HNSW later in cardinal
|
||||||
if !typeutil.IsDenseFloatVectorType(dType) {
|
if !(typeutil.IsDenseFloatVectorType(dType) || typeutil.IsBinaryVectorType(dType)) {
|
||||||
return fmt.Errorf("HNSW only support float vector data type")
|
return fmt.Errorf("HNSW only support float vector or bin data type")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -158,7 +158,7 @@ func Test_hnswChecker_CheckValidDataType(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
dType: schemapb.DataType_BinaryVector,
|
dType: schemapb.DataType_BinaryVector,
|
||||||
errIsNil: false,
|
errIsNil: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -362,6 +362,10 @@ func AppendPrepareLoadParams(params *paramtable.ComponentParam, indexParams map[
|
|||||||
for k, v := range params.AutoIndexConfig.PrepareParams.GetAsJSONMap() {
|
for k, v := range params.AutoIndexConfig.PrepareParams.GetAsJSONMap() {
|
||||||
indexParams[k] = v
|
indexParams[k] = v
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for k, v := range params.AutoIndexConfig.LoadAdaptParams.GetAsJSONMap() {
|
||||||
|
indexParams[k] = v
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -583,7 +583,7 @@ func TestBigDataIndex_parse(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func TestAppendPrepareInfo_parse(t *testing.T) {
|
func TestAppendPrepareInfo_parse(t *testing.T) {
|
||||||
t.Run("parse prepare info", func(t *testing.T) {
|
t.Run("parse load info", func(t *testing.T) {
|
||||||
var params paramtable.ComponentParam
|
var params paramtable.ComponentParam
|
||||||
params.Init(paramtable.NewBaseTable(paramtable.SkipRemote(true)))
|
params.Init(paramtable.NewBaseTable(paramtable.SkipRemote(true)))
|
||||||
params.Save(params.AutoIndexConfig.Enable.Key, "true")
|
params.Save(params.AutoIndexConfig.Enable.Key, "true")
|
||||||
@ -593,9 +593,16 @@ func TestAppendPrepareInfo_parse(t *testing.T) {
|
|||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
params.Save(params.AutoIndexConfig.PrepareParams.Key, string(str))
|
params.Save(params.AutoIndexConfig.PrepareParams.Key, string(str))
|
||||||
|
|
||||||
|
mapString2 := make(map[string]string)
|
||||||
|
mapString2["key2"] = "value2"
|
||||||
|
str2, err2 := json.Marshal(mapString2)
|
||||||
|
assert.NoError(t, err2)
|
||||||
|
params.Save(params.AutoIndexConfig.LoadAdaptParams.Key, string(str2))
|
||||||
|
|
||||||
resultMapString := make(map[string]string)
|
resultMapString := make(map[string]string)
|
||||||
err = AppendPrepareLoadParams(¶ms, resultMapString)
|
err = AppendPrepareLoadParams(¶ms, resultMapString)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Equal(t, resultMapString["key1"], "value1")
|
assert.Equal(t, resultMapString["key1"], "value1")
|
||||||
|
assert.Equal(t, resultMapString["key2"], "value2")
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -32,7 +32,10 @@ type autoIndexConfig struct {
|
|||||||
EnableOptimize ParamItem `refreshable:"true"`
|
EnableOptimize ParamItem `refreshable:"true"`
|
||||||
|
|
||||||
IndexParams ParamItem `refreshable:"true"`
|
IndexParams ParamItem `refreshable:"true"`
|
||||||
|
SparseIndexParams ParamItem `refreshable:"true"`
|
||||||
|
BinaryIndexParams ParamItem `refreshable:"true"`
|
||||||
PrepareParams ParamItem `refreshable:"true"`
|
PrepareParams ParamItem `refreshable:"true"`
|
||||||
|
LoadAdaptParams ParamItem `refreshable:"true"`
|
||||||
ExtraParams ParamItem `refreshable:"true"`
|
ExtraParams ParamItem `refreshable:"true"`
|
||||||
IndexType ParamItem `refreshable:"true"`
|
IndexType ParamItem `refreshable:"true"`
|
||||||
AutoIndexTypeName ParamItem `refreshable:"true"`
|
AutoIndexTypeName ParamItem `refreshable:"true"`
|
||||||
@ -71,12 +74,34 @@ func (p *autoIndexConfig) init(base *BaseTable) {
|
|||||||
}
|
}
|
||||||
p.IndexParams.Init(base.mgr)
|
p.IndexParams.Init(base.mgr)
|
||||||
|
|
||||||
|
p.SparseIndexParams = ParamItem{
|
||||||
|
Key: "autoIndex.params.sparse.build",
|
||||||
|
Version: "2.4.5",
|
||||||
|
DefaultValue: `{"index_type": "SPARSE_INVERTED_INDEX", "metric_type": "IP"}`,
|
||||||
|
Export: true,
|
||||||
|
}
|
||||||
|
p.SparseIndexParams.Init(base.mgr)
|
||||||
|
|
||||||
|
p.BinaryIndexParams = ParamItem{
|
||||||
|
Key: "autoIndex.params.binary.build",
|
||||||
|
Version: "2.4.5",
|
||||||
|
DefaultValue: `{"nlist": 1024, "index_type": "BIN_IVF_FLAT", "metric_type": "JACCARD"}`,
|
||||||
|
Export: true,
|
||||||
|
}
|
||||||
|
p.BinaryIndexParams.Init(base.mgr)
|
||||||
|
|
||||||
p.PrepareParams = ParamItem{
|
p.PrepareParams = ParamItem{
|
||||||
Key: "autoIndex.params.prepare",
|
Key: "autoIndex.params.prepare",
|
||||||
Version: "2.3.2",
|
Version: "2.3.2",
|
||||||
}
|
}
|
||||||
p.PrepareParams.Init(base.mgr)
|
p.PrepareParams.Init(base.mgr)
|
||||||
|
|
||||||
|
p.LoadAdaptParams = ParamItem{
|
||||||
|
Key: "autoIndex.params.load",
|
||||||
|
Version: "2.4.5",
|
||||||
|
}
|
||||||
|
p.LoadAdaptParams.Init(base.mgr)
|
||||||
|
|
||||||
p.ExtraParams = ParamItem{
|
p.ExtraParams = ParamItem{
|
||||||
Key: "autoIndex.params.extra",
|
Key: "autoIndex.params.extra",
|
||||||
Version: "2.2.0",
|
Version: "2.2.0",
|
||||||
|
@ -66,6 +66,56 @@ func TestAutoIndexParams_build(t *testing.T) {
|
|||||||
assert.Equal(t, strconv.Itoa(map2["nlist"].(int)), CParams.AutoIndexConfig.IndexParams.GetAsJSONMap()["nlist"])
|
assert.Equal(t, strconv.Itoa(map2["nlist"].(int)), CParams.AutoIndexConfig.IndexParams.GetAsJSONMap()["nlist"])
|
||||||
})
|
})
|
||||||
|
|
||||||
|
t.Run("test parseSparseBuildParams success", func(t *testing.T) {
|
||||||
|
// Params := CParams.AutoIndexConfig
|
||||||
|
// buildParams := make([string]interface)
|
||||||
|
var err error
|
||||||
|
map1 := map[string]any{
|
||||||
|
IndexTypeKey: "SPARSE_INVERTED_INDEX",
|
||||||
|
"drop_ratio_build": 0.1,
|
||||||
|
}
|
||||||
|
var jsonStrBytes []byte
|
||||||
|
jsonStrBytes, err = json.Marshal(map1)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
bt.Save(CParams.AutoIndexConfig.SparseIndexParams.Key, string(jsonStrBytes))
|
||||||
|
assert.Equal(t, "SPARSE_INVERTED_INDEX", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()[IndexTypeKey])
|
||||||
|
assert.Equal(t, "0.1", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()["drop_ratio_build"])
|
||||||
|
|
||||||
|
map2 := map[string]interface{}{
|
||||||
|
IndexTypeKey: "SPARSE_WAND",
|
||||||
|
"drop_ratio_build": 0.2,
|
||||||
|
}
|
||||||
|
jsonStrBytes, err = json.Marshal(map2)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
bt.Save(CParams.AutoIndexConfig.SparseIndexParams.Key, string(jsonStrBytes))
|
||||||
|
assert.Equal(t, "SPARSE_WAND", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()[IndexTypeKey])
|
||||||
|
assert.Equal(t, "0.2", CParams.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()["drop_ratio_build"])
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("test parseBinaryParams success", func(t *testing.T) {
|
||||||
|
// Params := CParams.AutoIndexConfig
|
||||||
|
// buildParams := make([string]interface)
|
||||||
|
var err error
|
||||||
|
map1 := map[string]any{
|
||||||
|
IndexTypeKey: "BIN_IVF_FLAT",
|
||||||
|
"nlist": 768,
|
||||||
|
}
|
||||||
|
var jsonStrBytes []byte
|
||||||
|
jsonStrBytes, err = json.Marshal(map1)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
bt.Save(CParams.AutoIndexConfig.BinaryIndexParams.Key, string(jsonStrBytes))
|
||||||
|
assert.Equal(t, "BIN_IVF_FLAT", CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()[IndexTypeKey])
|
||||||
|
assert.Equal(t, strconv.Itoa(map1["nlist"].(int)), CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()["nlist"])
|
||||||
|
|
||||||
|
map2 := map[string]interface{}{
|
||||||
|
IndexTypeKey: "BIN_FLAT",
|
||||||
|
}
|
||||||
|
jsonStrBytes, err = json.Marshal(map2)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
bt.Save(CParams.AutoIndexConfig.BinaryIndexParams.Key, string(jsonStrBytes))
|
||||||
|
assert.Equal(t, "BIN_FLAT", CParams.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()[IndexTypeKey])
|
||||||
|
})
|
||||||
|
|
||||||
t.Run("test parsePrepareParams success", func(t *testing.T) {
|
t.Run("test parsePrepareParams success", func(t *testing.T) {
|
||||||
var err error
|
var err error
|
||||||
map1 := map[string]any{
|
map1 := map[string]any{
|
||||||
|
Loading…
Reference in New Issue
Block a user