2022-10-08 15:38:58 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package proxy
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
|
|
|
|
2023-02-26 11:31:49 +08:00
|
|
|
"github.com/cockroachdb/errors"
|
2022-10-08 15:38:58 +08:00
|
|
|
"go.uber.org/zap"
|
|
|
|
|
2023-06-09 01:28:37 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/milvuspb"
|
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/schemapb"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
2022-10-14 17:51:24 +08:00
|
|
|
"github.com/milvus-io/milvus/internal/types"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/common"
|
|
|
|
"github.com/milvus-io/milvus/pkg/log"
|
2023-10-20 14:26:09 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/mq/msgstream"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/commonpbutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/funcutil"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/indexparamcheck"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/indexparams"
|
2023-07-03 15:26:26 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/merr"
|
2024-03-14 05:32:54 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/metric"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/paramtable"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2022-10-08 15:38:58 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
|
|
|
CreateIndexTaskName = "CreateIndexTask"
|
2023-12-21 18:07:24 +08:00
|
|
|
AlterIndexTaskName = "AlterIndexTask"
|
2022-10-08 15:38:58 +08:00
|
|
|
DescribeIndexTaskName = "DescribeIndexTask"
|
|
|
|
DropIndexTaskName = "DropIndexTask"
|
|
|
|
GetIndexStateTaskName = "GetIndexStateTask"
|
|
|
|
GetIndexBuildProgressTaskName = "GetIndexBuildProgressTask"
|
|
|
|
|
|
|
|
AutoIndexName = "AUTOINDEX"
|
|
|
|
DimKey = common.DimKey
|
2024-06-07 18:36:07 +08:00
|
|
|
IsSparseKey = common.IsSparseKey
|
2022-10-08 15:38:58 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
type createIndexTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2022-10-08 15:38:58 +08:00
|
|
|
Condition
|
2023-05-26 16:49:26 +08:00
|
|
|
req *milvuspb.CreateIndexRequest
|
|
|
|
ctx context.Context
|
2023-09-26 09:57:25 +08:00
|
|
|
rootCoord types.RootCoordClient
|
|
|
|
datacoord types.DataCoordClient
|
2023-05-26 16:49:26 +08:00
|
|
|
result *commonpb.Status
|
2022-10-08 15:38:58 +08:00
|
|
|
|
2023-10-20 14:26:09 +08:00
|
|
|
replicateMsgStream msgstream.MsgStream
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
isAutoIndex bool
|
|
|
|
newIndexParams []*commonpb.KeyValuePair
|
2023-03-26 22:15:59 +08:00
|
|
|
newTypeParams []*commonpb.KeyValuePair
|
2023-05-29 20:35:28 +08:00
|
|
|
newExtraParams []*commonpb.KeyValuePair
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
collectionID UniqueID
|
|
|
|
fieldSchema *schemapb.FieldSchema
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) TraceCtx() context.Context {
|
|
|
|
return cit.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) ID() UniqueID {
|
|
|
|
return cit.req.GetBase().GetMsgID()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) SetID(uid UniqueID) {
|
|
|
|
cit.req.GetBase().MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) Name() string {
|
|
|
|
return CreateIndexTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) Type() commonpb.MsgType {
|
|
|
|
return cit.req.GetBase().GetMsgType()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) BeginTs() Timestamp {
|
|
|
|
return cit.req.GetBase().GetTimestamp()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) EndTs() Timestamp {
|
|
|
|
return cit.req.GetBase().GetTimestamp()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) SetTs(ts Timestamp) {
|
|
|
|
cit.req.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) OnEnqueue() error {
|
2023-10-20 14:26:09 +08:00
|
|
|
if cit.req.Base == nil {
|
|
|
|
cit.req.Base = commonpbutil.NewMsgBase()
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-05-29 20:35:28 +08:00
|
|
|
func wrapUserIndexParams(metricType string) []*commonpb.KeyValuePair {
|
|
|
|
return []*commonpb.KeyValuePair{
|
|
|
|
{
|
|
|
|
Key: common.IndexTypeKey,
|
|
|
|
Value: AutoIndexName,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
Key: common.MetricTypeKey,
|
|
|
|
Value: metricType,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
func (cit *createIndexTask) parseIndexParams() error {
|
2023-05-29 20:35:28 +08:00
|
|
|
cit.newExtraParams = cit.req.GetExtraParams()
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
isVecIndex := typeutil.IsVectorType(cit.fieldSchema.DataType)
|
|
|
|
indexParamsMap := make(map[string]string)
|
|
|
|
|
|
|
|
for _, kv := range cit.req.GetExtraParams() {
|
|
|
|
if kv.Key == common.IndexParamsKey {
|
2022-12-07 18:01:19 +08:00
|
|
|
params, err := funcutil.JSONToMap(kv.Value)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
for k, v := range params {
|
|
|
|
indexParamsMap[k] = v
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
indexParamsMap[kv.Key] = kv.Value
|
|
|
|
}
|
|
|
|
}
|
2023-10-15 13:52:06 +08:00
|
|
|
|
2024-03-21 10:37:08 +08:00
|
|
|
specifyIndexType, exist := indexParamsMap[common.IndexTypeKey]
|
|
|
|
if exist && specifyIndexType != "" {
|
|
|
|
_, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(specifyIndexType)
|
|
|
|
if err != nil {
|
|
|
|
log.Ctx(cit.ctx).Warn("Failed to get index checker", zap.String(common.IndexTypeKey, specifyIndexType))
|
|
|
|
return merr.WrapErrParameterInvalid("valid index", fmt.Sprintf("invalid index type: %s", specifyIndexType))
|
|
|
|
}
|
|
|
|
}
|
2023-10-15 13:52:06 +08:00
|
|
|
|
2024-03-21 10:37:08 +08:00
|
|
|
if !isVecIndex {
|
|
|
|
specifyIndexType, exist := indexParamsMap[common.IndexTypeKey]
|
|
|
|
if Params.AutoIndexConfig.ScalarAutoIndexEnable.GetAsBool() || specifyIndexType == AutoIndexName || !exist {
|
|
|
|
if typeutil.IsArithmetic(cit.fieldSchema.DataType) {
|
|
|
|
indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarNumericIndexType.GetValue()
|
|
|
|
} else if typeutil.IsStringType(cit.fieldSchema.DataType) {
|
|
|
|
indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarVarcharIndexType.GetValue()
|
|
|
|
} else if typeutil.IsBoolType(cit.fieldSchema.DataType) {
|
|
|
|
indexParamsMap[common.IndexTypeKey] = Params.AutoIndexConfig.ScalarBoolIndexType.GetValue()
|
|
|
|
} else {
|
|
|
|
return merr.WrapErrParameterInvalid("supported field",
|
|
|
|
fmt.Sprintf("create auto index on %s field is not supported", cit.fieldSchema.DataType.String()))
|
feat: support inverted index (#28783)
issue: https://github.com/milvus-io/milvus/issues/27704
Add inverted index for some data types in Milvus. This index type can
save a lot of memory compared to loading all data into RAM and speed up
the term query and range query.
Supported: `INT8`, `INT16`, `INT32`, `INT64`, `FLOAT`, `DOUBLE`, `BOOL`
and `VARCHAR`.
Not supported: `ARRAY` and `JSON`.
Note:
- The inverted index for `VARCHAR` is not designed to serve full-text
search now. We will treat every row as a whole keyword instead of
tokenizing it into multiple terms.
- The inverted index don't support retrieval well, so if you create
inverted index for field, those operations which depend on the raw data
will fallback to use chunk storage, which will bring some performance
loss. For example, comparisons between two columns and retrieval of
output fields.
The inverted index is very easy to be used.
Taking below collection as an example:
```python
fields = [
FieldSchema(name="pk", dtype=DataType.VARCHAR, is_primary=True, auto_id=False, max_length=100),
FieldSchema(name="int8", dtype=DataType.INT8),
FieldSchema(name="int16", dtype=DataType.INT16),
FieldSchema(name="int32", dtype=DataType.INT32),
FieldSchema(name="int64", dtype=DataType.INT64),
FieldSchema(name="float", dtype=DataType.FLOAT),
FieldSchema(name="double", dtype=DataType.DOUBLE),
FieldSchema(name="bool", dtype=DataType.BOOL),
FieldSchema(name="varchar", dtype=DataType.VARCHAR, max_length=1000),
FieldSchema(name="random", dtype=DataType.DOUBLE),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=dim),
]
schema = CollectionSchema(fields)
collection = Collection("demo", schema)
```
Then we can simply create inverted index for field via:
```python
index_type = "INVERTED"
collection.create_index("int8", {"index_type": index_type})
collection.create_index("int16", {"index_type": index_type})
collection.create_index("int32", {"index_type": index_type})
collection.create_index("int64", {"index_type": index_type})
collection.create_index("float", {"index_type": index_type})
collection.create_index("double", {"index_type": index_type})
collection.create_index("bool", {"index_type": index_type})
collection.create_index("varchar", {"index_type": index_type})
```
Then, term query and range query on the field can be speed up
automatically by the inverted index:
```python
result = collection.query(expr='int64 in [1, 2, 3]', output_fields=["pk"])
result = collection.query(expr='int64 < 5', output_fields=["pk"])
result = collection.query(expr='int64 > 2997', output_fields=["pk"])
result = collection.query(expr='1 < int64 < 5', output_fields=["pk"])
```
---------
Signed-off-by: longjiquan <jiquan.long@zilliz.com>
2023-12-31 19:50:47 +08:00
|
|
|
}
|
2023-07-03 15:26:26 +08:00
|
|
|
}
|
2024-03-14 05:32:54 +08:00
|
|
|
} else {
|
2022-10-08 15:38:58 +08:00
|
|
|
specifyIndexType, exist := indexParamsMap[common.IndexTypeKey]
|
2023-05-29 20:35:28 +08:00
|
|
|
if Params.AutoIndexConfig.Enable.GetAsBool() { // `enable` only for cloud instance.
|
2023-04-30 11:50:40 +08:00
|
|
|
log.Info("create index trigger AutoIndex",
|
|
|
|
zap.String("original type", specifyIndexType),
|
|
|
|
zap.String("final type", Params.AutoIndexConfig.AutoIndexTypeName.GetValue()))
|
2023-05-29 20:35:28 +08:00
|
|
|
|
|
|
|
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
|
|
|
|
2024-06-06 10:33:52 +08:00
|
|
|
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
|
|
|
|
// override float vector index params by autoindex
|
|
|
|
for k, v := range Params.AutoIndexConfig.IndexParams.GetAsJSONMap() {
|
|
|
|
indexParamsMap[k] = v
|
|
|
|
}
|
|
|
|
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
|
|
|
|
// override sparse float vector index params by autoindex
|
|
|
|
for k, v := range Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap() {
|
|
|
|
indexParamsMap[k] = v
|
|
|
|
}
|
|
|
|
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
|
|
|
|
// override binary vector index params by autoindex
|
|
|
|
for k, v := range Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap() {
|
|
|
|
indexParamsMap[k] = v
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
2023-05-29 20:35:28 +08:00
|
|
|
|
|
|
|
if metricTypeExist {
|
|
|
|
// make the users' metric type first class citizen.
|
|
|
|
indexParamsMap[common.MetricTypeKey] = metricType
|
|
|
|
}
|
|
|
|
} else { // behavior change after 2.2.9, adapt autoindex logic here.
|
2024-06-14 23:26:00 +08:00
|
|
|
useAutoIndex := func(autoIndexConfig map[string]string) {
|
2023-05-29 20:35:28 +08:00
|
|
|
fields := make([]zap.Field, 0, len(autoIndexConfig))
|
|
|
|
for k, v := range autoIndexConfig {
|
|
|
|
indexParamsMap[k] = v
|
|
|
|
fields = append(fields, zap.String(k, v))
|
|
|
|
}
|
|
|
|
log.Ctx(cit.ctx).Info("AutoIndex triggered", fields...)
|
|
|
|
}
|
|
|
|
|
2024-06-14 23:26:00 +08:00
|
|
|
handle := func(numberParams int, autoIndexConfig map[string]string) error {
|
2023-05-29 20:35:28 +08:00
|
|
|
// empty case.
|
|
|
|
if len(indexParamsMap) == numberParams {
|
|
|
|
// though we already know there must be metric type, how to make this safer to avoid crash?
|
|
|
|
metricType := autoIndexConfig[common.MetricTypeKey]
|
|
|
|
cit.newExtraParams = wrapUserIndexParams(metricType)
|
2024-06-14 23:26:00 +08:00
|
|
|
useAutoIndex(autoIndexConfig)
|
2023-05-29 20:35:28 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
|
|
|
|
|
|
|
if len(indexParamsMap) > numberParams+1 {
|
|
|
|
return fmt.Errorf("only metric type can be passed when use AutoIndex")
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(indexParamsMap) == numberParams+1 {
|
|
|
|
if !metricTypeExist {
|
|
|
|
return fmt.Errorf("only metric type can be passed when use AutoIndex")
|
|
|
|
}
|
|
|
|
|
|
|
|
// only metric type is passed.
|
|
|
|
cit.newExtraParams = wrapUserIndexParams(metricType)
|
2024-06-14 23:26:00 +08:00
|
|
|
useAutoIndex(autoIndexConfig)
|
2023-05-29 20:35:28 +08:00
|
|
|
// make the users' metric type first class citizen.
|
|
|
|
indexParamsMap[common.MetricTypeKey] = metricType
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-06-14 23:26:00 +08:00
|
|
|
var config map[string]string
|
|
|
|
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
|
|
|
|
// override float vector index params by autoindex
|
|
|
|
config = Params.AutoIndexConfig.IndexParams.GetAsJSONMap()
|
|
|
|
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
|
|
|
|
// override sparse float vector index params by autoindex
|
|
|
|
config = Params.AutoIndexConfig.SparseIndexParams.GetAsJSONMap()
|
|
|
|
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
|
|
|
|
// override binary vector index params by autoindex
|
|
|
|
config = Params.AutoIndexConfig.BinaryIndexParams.GetAsJSONMap()
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
if !exist {
|
2024-06-14 23:26:00 +08:00
|
|
|
if err := handle(0, config); err != nil {
|
2023-05-29 20:35:28 +08:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else if specifyIndexType == AutoIndexName {
|
2024-06-14 23:26:00 +08:00
|
|
|
if err := handle(1, config); err != nil {
|
2023-05-29 20:35:28 +08:00
|
|
|
return err
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
}
|
2022-10-14 17:51:24 +08:00
|
|
|
|
|
|
|
indexType, exist := indexParamsMap[common.IndexTypeKey]
|
|
|
|
if !exist {
|
|
|
|
return fmt.Errorf("IndexType not specified")
|
|
|
|
}
|
|
|
|
if indexType == indexparamcheck.IndexDISKANN {
|
2022-11-04 14:25:38 +08:00
|
|
|
err := indexparams.FillDiskIndexParams(Params, indexParamsMap)
|
2022-10-14 17:51:24 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2024-06-14 23:26:00 +08:00
|
|
|
metricType, metricTypeExist := indexParamsMap[common.MetricTypeKey]
|
|
|
|
if !metricTypeExist {
|
|
|
|
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "metric type not set for vector index")
|
|
|
|
}
|
|
|
|
if typeutil.IsDenseFloatVectorType(cit.fieldSchema.DataType) {
|
|
|
|
if !funcutil.SliceContain(indexparamcheck.FloatVectorMetrics, metricType) {
|
|
|
|
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "float vector index does not support metric type: "+metricType)
|
|
|
|
}
|
|
|
|
} else if typeutil.IsSparseFloatVectorType(cit.fieldSchema.DataType) {
|
|
|
|
if metricType != metric.IP {
|
|
|
|
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "only IP is the supported metric type for sparse index")
|
|
|
|
}
|
|
|
|
} else if typeutil.IsBinaryVectorType(cit.fieldSchema.DataType) {
|
|
|
|
if !funcutil.SliceContain(indexparamcheck.BinaryVectorMetrics, metricType) {
|
|
|
|
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", "binary vector index does not support metric type: "+metricType)
|
2024-03-14 05:32:54 +08:00
|
|
|
}
|
|
|
|
}
|
2024-03-21 10:37:08 +08:00
|
|
|
}
|
2022-10-14 17:51:24 +08:00
|
|
|
|
2024-03-21 10:37:08 +08:00
|
|
|
err := checkTrain(cit.fieldSchema, indexParamsMap)
|
|
|
|
if err != nil {
|
|
|
|
return merr.WrapErrParameterInvalid("valid index params", "invalid index params", err.Error())
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
2024-03-21 10:37:08 +08:00
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
typeParams := cit.fieldSchema.GetTypeParams()
|
2023-03-26 22:15:59 +08:00
|
|
|
typeParamsMap := make(map[string]string)
|
2022-10-08 15:38:58 +08:00
|
|
|
for _, pair := range typeParams {
|
2023-03-26 22:15:59 +08:00
|
|
|
typeParamsMap[pair.Key] = pair.Value
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
for k, v := range indexParamsMap {
|
2023-09-21 09:45:27 +08:00
|
|
|
// Currently, it is required that type_params and index_params do not have same keys.
|
2023-05-16 17:41:22 +08:00
|
|
|
if k == DimKey || k == common.MaxLengthKey {
|
2023-03-26 22:15:59 +08:00
|
|
|
delete(indexParamsMap, k)
|
2022-10-08 15:38:58 +08:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
cit.newIndexParams = append(cit.newIndexParams, &commonpb.KeyValuePair{Key: k, Value: v})
|
|
|
|
}
|
|
|
|
|
2023-03-26 22:15:59 +08:00
|
|
|
for k, v := range typeParamsMap {
|
|
|
|
if _, ok := indexParamsMap[k]; ok {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
cit.newTypeParams = append(cit.newTypeParams, &commonpb.KeyValuePair{Key: k, Value: v})
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) getIndexedField(ctx context.Context) (*schemapb.FieldSchema, error) {
|
2023-06-25 17:20:43 +08:00
|
|
|
schema, err := globalMetaCache.GetCollectionSchema(ctx, cit.req.GetDbName(), cit.req.GetCollectionName())
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to get collection schema", zap.Error(err))
|
|
|
|
return nil, fmt.Errorf("failed to get collection schema: %s", err)
|
|
|
|
}
|
2024-01-04 17:28:46 +08:00
|
|
|
schemaHelper, err := typeutil.CreateSchemaHelper(schema.CollectionSchema)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to parse collection schema", zap.Error(err))
|
|
|
|
return nil, fmt.Errorf("failed to parse collection schema: %s", err)
|
|
|
|
}
|
|
|
|
field, err := schemaHelper.GetFieldFromName(cit.req.GetFieldName())
|
|
|
|
if err != nil {
|
|
|
|
log.Error("create index on non-exist field", zap.Error(err))
|
|
|
|
return nil, fmt.Errorf("cannot create index on non-exist field: %s", cit.req.GetFieldName())
|
|
|
|
}
|
|
|
|
return field, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func fillDimension(field *schemapb.FieldSchema, indexParams map[string]string) error {
|
2024-04-07 14:27:22 +08:00
|
|
|
if !typeutil.IsVectorType(field.GetDataType()) {
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
params := make([]*commonpb.KeyValuePair, 0, len(field.GetTypeParams())+len(field.GetIndexParams()))
|
|
|
|
params = append(params, field.GetTypeParams()...)
|
|
|
|
params = append(params, field.GetIndexParams()...)
|
|
|
|
dimensionInSchema, err := funcutil.GetAttrByKeyFromRepeatedKV(DimKey, params)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("dimension not found in schema")
|
|
|
|
}
|
|
|
|
dimension, exist := indexParams[DimKey]
|
|
|
|
if exist {
|
|
|
|
if dimensionInSchema != dimension {
|
|
|
|
return fmt.Errorf("dimension mismatch, dimension in schema: %s, dimension: %s", dimensionInSchema, dimension)
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
indexParams[DimKey] = dimensionInSchema
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func checkTrain(field *schemapb.FieldSchema, indexParams map[string]string) error {
|
|
|
|
indexType := indexParams[common.IndexTypeKey]
|
2024-06-14 19:45:58 +08:00
|
|
|
if typeutil.IsVectorType(field.DataType) && indexType != indexparamcheck.AutoIndex {
|
|
|
|
exist := indexparamcheck.CheckVecIndexWithDataTypeExist(indexType, field.DataType)
|
|
|
|
if !exist {
|
|
|
|
return fmt.Errorf("data type %d can't build with this index %s", field.DataType, indexType)
|
|
|
|
}
|
|
|
|
}
|
2023-05-06 10:40:39 +08:00
|
|
|
checker, err := indexparamcheck.GetIndexCheckerMgrInstance().GetChecker(indexType)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
2023-05-06 10:40:39 +08:00
|
|
|
log.Warn("Failed to get index checker", zap.String(common.IndexTypeKey, indexType))
|
2022-10-08 15:38:58 +08:00
|
|
|
return fmt.Errorf("invalid index type: %s", indexType)
|
|
|
|
}
|
|
|
|
|
2024-06-07 18:36:07 +08:00
|
|
|
isSparse := typeutil.IsSparseFloatVectorType(field.DataType)
|
|
|
|
|
|
|
|
if !isSparse {
|
2024-03-14 05:32:54 +08:00
|
|
|
if err := fillDimension(field, indexParams); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2024-06-07 18:36:07 +08:00
|
|
|
} else {
|
|
|
|
// used only for checker, should be deleted after checking
|
|
|
|
indexParams[IsSparseKey] = "true"
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
2023-05-06 10:40:39 +08:00
|
|
|
if err := checker.CheckValidDataType(field.GetDataType()); err != nil {
|
|
|
|
log.Info("create index with invalid data type", zap.Error(err), zap.String("data_type", field.GetDataType().String()))
|
|
|
|
return err
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
2023-05-06 10:40:39 +08:00
|
|
|
if err := checker.CheckTrain(indexParams); err != nil {
|
|
|
|
log.Info("create index with invalid parameters", zap.Error(err))
|
|
|
|
return err
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
2024-06-07 18:36:07 +08:00
|
|
|
if isSparse {
|
|
|
|
delete(indexParams, IsSparseKey)
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) PreExecute(ctx context.Context) error {
|
|
|
|
cit.req.Base.MsgType = commonpb.MsgType_CreateIndex
|
2022-11-04 14:25:38 +08:00
|
|
|
cit.req.Base.SourceID = paramtable.GetNodeID()
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
collName := cit.req.GetCollectionName()
|
|
|
|
|
2023-06-25 17:20:43 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(ctx, cit.req.GetDbName(), collName)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
cit.collectionID = collID
|
|
|
|
|
2022-10-16 21:05:25 +08:00
|
|
|
if err = validateIndexName(cit.req.GetIndexName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
field, err := cit.getIndexedField(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
cit.fieldSchema = field
|
|
|
|
// check index param, not accurate, only some static rules
|
2022-10-27 13:05:31 +08:00
|
|
|
err = cit.parseIndexParams()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) Execute(ctx context.Context) error {
|
2023-07-14 15:56:31 +08:00
|
|
|
log.Ctx(ctx).Info("proxy create index", zap.Int64("collectionID", cit.collectionID), zap.Int64("fieldID", cit.fieldSchema.GetFieldID()),
|
2022-10-08 15:38:58 +08:00
|
|
|
zap.String("indexName", cit.req.GetIndexName()), zap.Any("typeParams", cit.fieldSchema.GetTypeParams()),
|
2023-05-29 20:35:28 +08:00
|
|
|
zap.Any("indexParams", cit.req.GetExtraParams()),
|
|
|
|
zap.Any("newExtraParams", cit.newExtraParams),
|
|
|
|
)
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
var err error
|
2023-01-11 14:35:40 +08:00
|
|
|
req := &indexpb.CreateIndexRequest{
|
2022-10-08 15:38:58 +08:00
|
|
|
CollectionID: cit.collectionID,
|
|
|
|
FieldID: cit.fieldSchema.GetFieldID(),
|
|
|
|
IndexName: cit.req.GetIndexName(),
|
2023-04-09 16:18:29 +08:00
|
|
|
TypeParams: cit.newTypeParams,
|
2022-10-08 15:38:58 +08:00
|
|
|
IndexParams: cit.newIndexParams,
|
|
|
|
IsAutoIndex: cit.isAutoIndex,
|
2023-05-29 20:35:28 +08:00
|
|
|
UserIndexParams: cit.newExtraParams,
|
2022-10-08 15:38:58 +08:00
|
|
|
Timestamp: cit.BeginTs(),
|
|
|
|
}
|
2023-01-04 19:37:36 +08:00
|
|
|
cit.result, err = cit.datacoord.CreateIndex(ctx, req)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if cit.result.ErrorCode != commonpb.ErrorCode_Success {
|
|
|
|
return errors.New(cit.result.Reason)
|
|
|
|
}
|
2023-10-20 14:26:09 +08:00
|
|
|
SendReplicateMessagePack(ctx, cit.replicateMsgStream, cit.req)
|
|
|
|
return nil
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (cit *createIndexTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-12-21 18:07:24 +08:00
|
|
|
type alterIndexTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2023-12-21 18:07:24 +08:00
|
|
|
Condition
|
|
|
|
req *milvuspb.AlterIndexRequest
|
|
|
|
ctx context.Context
|
|
|
|
datacoord types.DataCoordClient
|
|
|
|
querycoord types.QueryCoordClient
|
|
|
|
result *commonpb.Status
|
|
|
|
|
|
|
|
replicateMsgStream msgstream.MsgStream
|
|
|
|
|
|
|
|
collectionID UniqueID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) TraceCtx() context.Context {
|
|
|
|
return t.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) ID() UniqueID {
|
|
|
|
return t.req.GetBase().GetMsgID()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) SetID(uid UniqueID) {
|
|
|
|
t.req.GetBase().MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) Name() string {
|
|
|
|
return CreateIndexTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) Type() commonpb.MsgType {
|
|
|
|
return t.req.GetBase().GetMsgType()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) BeginTs() Timestamp {
|
|
|
|
return t.req.GetBase().GetTimestamp()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) EndTs() Timestamp {
|
|
|
|
return t.req.GetBase().GetTimestamp()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) SetTs(ts Timestamp) {
|
|
|
|
t.req.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) OnEnqueue() error {
|
|
|
|
if t.req.Base == nil {
|
|
|
|
t.req.Base = commonpbutil.NewMsgBase()
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) PreExecute(ctx context.Context) error {
|
|
|
|
t.req.Base.MsgType = commonpb.MsgType_AlterIndex
|
|
|
|
t.req.Base.SourceID = paramtable.GetNodeID()
|
|
|
|
|
|
|
|
for _, param := range t.req.GetExtraParams() {
|
|
|
|
if !indexparams.IsConfigableIndexParam(param.GetKey()) {
|
|
|
|
return merr.WrapErrParameterInvalidMsg("%s is not configable index param", param.GetKey())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
collName := t.req.GetCollectionName()
|
|
|
|
|
|
|
|
collection, err := globalMetaCache.GetCollectionID(ctx, t.req.GetDbName(), collName)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.collectionID = collection
|
|
|
|
|
2024-01-17 16:40:54 +08:00
|
|
|
if len(t.req.GetIndexName()) == 0 {
|
|
|
|
return merr.WrapErrParameterInvalidMsg("index name is empty")
|
|
|
|
}
|
|
|
|
|
2023-12-21 18:07:24 +08:00
|
|
|
if err = validateIndexName(t.req.GetIndexName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
loaded, err := isCollectionLoaded(ctx, t.querycoord, collection)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if loaded {
|
|
|
|
return merr.WrapErrCollectionLoaded(collName, "can't alter index on loaded collection, please release the collection first")
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) Execute(ctx context.Context) error {
|
|
|
|
log := log.Ctx(ctx).With(
|
|
|
|
zap.String("collection", t.req.GetCollectionName()),
|
|
|
|
zap.String("indexName", t.req.GetIndexName()),
|
|
|
|
zap.Any("params", t.req.GetExtraParams()),
|
|
|
|
)
|
|
|
|
|
|
|
|
log.Info("alter index")
|
|
|
|
|
|
|
|
var err error
|
|
|
|
req := &indexpb.AlterIndexRequest{
|
|
|
|
CollectionID: t.collectionID,
|
|
|
|
IndexName: t.req.GetIndexName(),
|
|
|
|
Params: t.req.GetExtraParams(),
|
|
|
|
}
|
|
|
|
t.result, err = t.datacoord.AlterIndex(ctx, req)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if t.result.ErrorCode != commonpb.ErrorCode_Success {
|
|
|
|
return errors.New(t.result.Reason)
|
|
|
|
}
|
|
|
|
SendReplicateMessagePack(ctx, t.replicateMsgStream, t.req)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (t *alterIndexTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
type describeIndexTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2022-10-08 15:38:58 +08:00
|
|
|
Condition
|
|
|
|
*milvuspb.DescribeIndexRequest
|
2023-01-04 19:37:36 +08:00
|
|
|
ctx context.Context
|
2023-09-26 09:57:25 +08:00
|
|
|
datacoord types.DataCoordClient
|
2023-01-04 19:37:36 +08:00
|
|
|
result *milvuspb.DescribeIndexResponse
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
collectionID UniqueID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) TraceCtx() context.Context {
|
|
|
|
return dit.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) ID() UniqueID {
|
|
|
|
return dit.Base.MsgID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) SetID(uid UniqueID) {
|
|
|
|
dit.Base.MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) Name() string {
|
|
|
|
return DescribeIndexTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) Type() commonpb.MsgType {
|
|
|
|
return dit.Base.MsgType
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) BeginTs() Timestamp {
|
|
|
|
return dit.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) EndTs() Timestamp {
|
|
|
|
return dit.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) SetTs(ts Timestamp) {
|
|
|
|
dit.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) OnEnqueue() error {
|
2022-10-19 10:01:26 +08:00
|
|
|
dit.Base = commonpbutil.NewMsgBase()
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) PreExecute(ctx context.Context) error {
|
|
|
|
dit.Base.MsgType = commonpb.MsgType_DescribeIndex
|
2022-11-04 14:25:38 +08:00
|
|
|
dit.Base.SourceID = paramtable.GetNodeID()
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
if err := validateCollectionName(dit.CollectionName); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-06-25 17:20:43 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(ctx, dit.GetDbName(), dit.CollectionName)
|
2022-10-25 11:29:30 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
dit.collectionID = collID
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) Execute(ctx context.Context) error {
|
2023-06-25 17:20:43 +08:00
|
|
|
schema, err := globalMetaCache.GetCollectionSchema(ctx, dit.GetDbName(), dit.GetCollectionName())
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to get collection schema", zap.Error(err))
|
|
|
|
return fmt.Errorf("failed to get collection schema: %s", err)
|
|
|
|
}
|
2024-01-04 17:28:46 +08:00
|
|
|
schemaHelper, err := typeutil.CreateSchemaHelper(schema.CollectionSchema)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to parse collection schema", zap.Error(err))
|
|
|
|
return fmt.Errorf("failed to parse collection schema: %s", err)
|
|
|
|
}
|
|
|
|
|
2023-07-25 10:05:00 +08:00
|
|
|
resp, err := dit.datacoord.DescribeIndex(ctx, &indexpb.DescribeIndexRequest{CollectionID: dit.collectionID, IndexName: dit.IndexName, Timestamp: dit.Timestamp})
|
2023-10-08 21:23:32 +08:00
|
|
|
if err != nil {
|
2022-10-08 15:38:58 +08:00
|
|
|
return err
|
|
|
|
}
|
2023-10-08 21:23:32 +08:00
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
dit.result = &milvuspb.DescribeIndexResponse{}
|
|
|
|
dit.result.Status = resp.GetStatus()
|
2023-10-08 21:23:32 +08:00
|
|
|
err = merr.Error(resp.GetStatus())
|
|
|
|
if err != nil {
|
|
|
|
if errors.Is(err, merr.ErrIndexNotFound) && len(dit.GetIndexName()) == 0 {
|
|
|
|
err = merr.WrapErrIndexNotFoundForCollection(dit.GetCollectionName())
|
|
|
|
dit.result.Status = merr.Status(err)
|
|
|
|
}
|
|
|
|
return err
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
for _, indexInfo := range resp.IndexInfos {
|
|
|
|
field, err := schemaHelper.GetFieldFromID(indexInfo.FieldID)
|
|
|
|
if err != nil {
|
|
|
|
log.Error("failed to get collection field", zap.Error(err))
|
|
|
|
return fmt.Errorf("failed to get collection field: %d", indexInfo.FieldID)
|
|
|
|
}
|
|
|
|
params := indexInfo.GetUserIndexParams()
|
|
|
|
if params == nil {
|
2023-06-16 14:30:40 +08:00
|
|
|
metricType, err := funcutil.GetAttrByKeyFromRepeatedKV(MetricTypeKey, indexInfo.GetIndexParams())
|
|
|
|
if err == nil {
|
|
|
|
params = wrapUserIndexParams(metricType)
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
desc := &milvuspb.IndexDescription{
|
|
|
|
IndexName: indexInfo.GetIndexName(),
|
|
|
|
IndexID: indexInfo.GetIndexID(),
|
|
|
|
FieldName: field.Name,
|
|
|
|
Params: params,
|
|
|
|
IndexedRows: indexInfo.GetIndexedRows(),
|
|
|
|
TotalRows: indexInfo.GetTotalRows(),
|
2023-06-01 18:14:32 +08:00
|
|
|
PendingIndexRows: indexInfo.GetPendingIndexRows(),
|
2022-10-08 15:38:58 +08:00
|
|
|
State: indexInfo.GetState(),
|
|
|
|
IndexStateFailReason: indexInfo.GetIndexStateFailReason(),
|
|
|
|
}
|
|
|
|
dit.result.IndexDescriptions = append(dit.result.IndexDescriptions, desc)
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *describeIndexTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-05-06 10:34:39 +08:00
|
|
|
type getIndexStatisticsTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2023-05-06 10:34:39 +08:00
|
|
|
Condition
|
|
|
|
*milvuspb.GetIndexStatisticsRequest
|
|
|
|
ctx context.Context
|
2023-09-26 09:57:25 +08:00
|
|
|
datacoord types.DataCoordClient
|
2023-05-06 10:34:39 +08:00
|
|
|
result *milvuspb.GetIndexStatisticsResponse
|
|
|
|
|
|
|
|
nodeID int64
|
|
|
|
collectionID UniqueID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) TraceCtx() context.Context {
|
|
|
|
return dit.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) ID() UniqueID {
|
|
|
|
return dit.Base.MsgID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) SetID(uid UniqueID) {
|
|
|
|
dit.Base.MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) Name() string {
|
|
|
|
return DescribeIndexTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) Type() commonpb.MsgType {
|
|
|
|
return dit.Base.MsgType
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) BeginTs() Timestamp {
|
|
|
|
return dit.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) EndTs() Timestamp {
|
|
|
|
return dit.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) SetTs(ts Timestamp) {
|
|
|
|
dit.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) OnEnqueue() error {
|
|
|
|
dit.Base = commonpbutil.NewMsgBase()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) PreExecute(ctx context.Context) error {
|
|
|
|
dit.Base.MsgType = commonpb.MsgType_GetIndexStatistics
|
|
|
|
dit.Base.SourceID = dit.nodeID
|
|
|
|
|
|
|
|
if err := validateCollectionName(dit.CollectionName); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-06-25 17:20:43 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(ctx, dit.GetDbName(), dit.CollectionName)
|
2023-05-06 10:34:39 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dit.collectionID = collID
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) Execute(ctx context.Context) error {
|
2023-06-25 17:20:43 +08:00
|
|
|
schema, err := globalMetaCache.GetCollectionSchema(ctx, dit.GetDbName(), dit.GetCollectionName())
|
2023-05-06 10:34:39 +08:00
|
|
|
if err != nil {
|
2023-05-08 10:02:39 +08:00
|
|
|
log.Error("failed to get collection schema", zap.String("collection_name", dit.GetCollectionName()), zap.Error(err))
|
|
|
|
return fmt.Errorf("failed to get collection schema: %s", dit.GetCollectionName())
|
2023-05-06 10:34:39 +08:00
|
|
|
}
|
2024-01-04 17:28:46 +08:00
|
|
|
schemaHelper, err := typeutil.CreateSchemaHelper(schema.CollectionSchema)
|
2023-05-06 10:34:39 +08:00
|
|
|
if err != nil {
|
2023-05-08 10:02:39 +08:00
|
|
|
log.Error("failed to parse collection schema", zap.String("collection_name", schema.GetName()), zap.Error(err))
|
|
|
|
return fmt.Errorf("failed to parse collection schema: %s", dit.GetCollectionName())
|
2023-05-06 10:34:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := dit.datacoord.GetIndexStatistics(ctx, &indexpb.GetIndexStatisticsRequest{
|
2023-09-21 09:45:27 +08:00
|
|
|
CollectionID: dit.collectionID, IndexName: dit.IndexName,
|
|
|
|
})
|
2023-05-06 10:34:39 +08:00
|
|
|
if err != nil || resp == nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dit.result = &milvuspb.GetIndexStatisticsResponse{}
|
|
|
|
dit.result.Status = resp.GetStatus()
|
2023-09-12 16:07:18 +08:00
|
|
|
if dit.result.GetStatus().GetErrorCode() != commonpb.ErrorCode_Success {
|
2023-10-07 11:29:32 +08:00
|
|
|
return merr.Error(dit.result.GetStatus())
|
2023-05-06 10:34:39 +08:00
|
|
|
}
|
|
|
|
for _, indexInfo := range resp.IndexInfos {
|
|
|
|
field, err := schemaHelper.GetFieldFromID(indexInfo.FieldID)
|
|
|
|
if err != nil {
|
2023-05-08 10:02:39 +08:00
|
|
|
log.Error("failed to get collection field", zap.Int64("field_id", indexInfo.FieldID), zap.Error(err))
|
2023-05-06 10:34:39 +08:00
|
|
|
return fmt.Errorf("failed to get collection field: %d", indexInfo.FieldID)
|
|
|
|
}
|
|
|
|
params := indexInfo.GetUserIndexParams()
|
|
|
|
if params == nil {
|
|
|
|
params = indexInfo.GetIndexParams()
|
|
|
|
}
|
|
|
|
desc := &milvuspb.IndexDescription{
|
|
|
|
IndexName: indexInfo.GetIndexName(),
|
|
|
|
IndexID: indexInfo.GetIndexID(),
|
|
|
|
FieldName: field.Name,
|
|
|
|
Params: params,
|
|
|
|
IndexedRows: indexInfo.GetIndexedRows(),
|
|
|
|
TotalRows: indexInfo.GetTotalRows(),
|
|
|
|
State: indexInfo.GetState(),
|
|
|
|
IndexStateFailReason: indexInfo.GetIndexStateFailReason(),
|
|
|
|
}
|
|
|
|
dit.result.IndexDescriptions = append(dit.result.IndexDescriptions, desc)
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *getIndexStatisticsTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
type dropIndexTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2022-10-08 15:38:58 +08:00
|
|
|
Condition
|
|
|
|
ctx context.Context
|
|
|
|
*milvuspb.DropIndexRequest
|
2023-09-26 09:57:25 +08:00
|
|
|
dataCoord types.DataCoordClient
|
|
|
|
queryCoord types.QueryCoordClient
|
2022-10-08 15:38:58 +08:00
|
|
|
result *commonpb.Status
|
|
|
|
|
|
|
|
collectionID UniqueID
|
2023-10-20 14:26:09 +08:00
|
|
|
|
|
|
|
replicateMsgStream msgstream.MsgStream
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) TraceCtx() context.Context {
|
|
|
|
return dit.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) ID() UniqueID {
|
|
|
|
return dit.Base.MsgID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) SetID(uid UniqueID) {
|
|
|
|
dit.Base.MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) Name() string {
|
|
|
|
return DropIndexTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) Type() commonpb.MsgType {
|
|
|
|
return dit.Base.MsgType
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) BeginTs() Timestamp {
|
|
|
|
return dit.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) EndTs() Timestamp {
|
|
|
|
return dit.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) SetTs(ts Timestamp) {
|
|
|
|
dit.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) OnEnqueue() error {
|
2023-10-20 14:26:09 +08:00
|
|
|
if dit.Base == nil {
|
|
|
|
dit.Base = commonpbutil.NewMsgBase()
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) PreExecute(ctx context.Context) error {
|
|
|
|
dit.Base.MsgType = commonpb.MsgType_DropIndex
|
2022-11-04 14:25:38 +08:00
|
|
|
dit.Base.SourceID = paramtable.GetNodeID()
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
collName, fieldName := dit.CollectionName, dit.FieldName
|
|
|
|
|
|
|
|
if err := validateCollectionName(collName); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2022-10-28 16:41:32 +08:00
|
|
|
if fieldName != "" {
|
|
|
|
if err := validateFieldName(fieldName); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
2023-06-25 17:20:43 +08:00
|
|
|
collID, err := globalMetaCache.GetCollectionID(ctx, dit.GetDbName(), dit.CollectionName)
|
2022-10-25 11:29:30 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
dit.collectionID = collID
|
|
|
|
|
2022-10-27 13:05:31 +08:00
|
|
|
loaded, err := isCollectionLoaded(ctx, dit.queryCoord, collID)
|
2022-10-19 23:17:44 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if loaded {
|
|
|
|
return errors.New("index cannot be dropped, collection is loaded, please release it first")
|
|
|
|
}
|
|
|
|
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) Execute(ctx context.Context) error {
|
2023-10-20 14:26:09 +08:00
|
|
|
ctxLog := log.Ctx(ctx)
|
|
|
|
ctxLog.Info("proxy drop index", zap.Int64("collID", dit.collectionID),
|
|
|
|
zap.String("field_name", dit.FieldName),
|
|
|
|
zap.String("index_name", dit.IndexName),
|
|
|
|
zap.String("db_name", dit.DbName),
|
|
|
|
)
|
|
|
|
|
2022-10-31 11:39:33 +08:00
|
|
|
var err error
|
2023-01-11 14:35:40 +08:00
|
|
|
dit.result, err = dit.dataCoord.DropIndex(ctx, &indexpb.DropIndexRequest{
|
2022-10-08 15:38:58 +08:00
|
|
|
CollectionID: dit.collectionID,
|
|
|
|
PartitionIDs: nil,
|
|
|
|
IndexName: dit.IndexName,
|
2022-10-31 11:39:33 +08:00
|
|
|
DropAll: false,
|
2022-10-08 15:38:58 +08:00
|
|
|
})
|
2023-10-20 14:26:09 +08:00
|
|
|
if err != nil {
|
|
|
|
ctxLog.Warn("drop index failed", zap.Error(err))
|
|
|
|
return err
|
|
|
|
}
|
2022-10-08 15:38:58 +08:00
|
|
|
if dit.result == nil {
|
|
|
|
return errors.New("drop index resp is nil")
|
|
|
|
}
|
|
|
|
if dit.result.ErrorCode != commonpb.ErrorCode_Success {
|
|
|
|
return errors.New(dit.result.Reason)
|
|
|
|
}
|
2023-10-20 14:26:09 +08:00
|
|
|
SendReplicateMessagePack(ctx, dit.replicateMsgStream, dit.DropIndexRequest)
|
|
|
|
return nil
|
2022-10-08 15:38:58 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
func (dit *dropIndexTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Deprecated: use describeIndexTask instead
|
|
|
|
type getIndexBuildProgressTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2022-10-08 15:38:58 +08:00
|
|
|
Condition
|
|
|
|
*milvuspb.GetIndexBuildProgressRequest
|
2023-01-04 19:37:36 +08:00
|
|
|
ctx context.Context
|
2023-09-26 09:57:25 +08:00
|
|
|
rootCoord types.RootCoordClient
|
|
|
|
dataCoord types.DataCoordClient
|
2023-01-04 19:37:36 +08:00
|
|
|
result *milvuspb.GetIndexBuildProgressResponse
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
collectionID UniqueID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) TraceCtx() context.Context {
|
|
|
|
return gibpt.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) ID() UniqueID {
|
|
|
|
return gibpt.Base.MsgID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) SetID(uid UniqueID) {
|
|
|
|
gibpt.Base.MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) Name() string {
|
|
|
|
return GetIndexBuildProgressTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) Type() commonpb.MsgType {
|
|
|
|
return gibpt.Base.MsgType
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) BeginTs() Timestamp {
|
|
|
|
return gibpt.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) EndTs() Timestamp {
|
|
|
|
return gibpt.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) SetTs(ts Timestamp) {
|
|
|
|
gibpt.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) OnEnqueue() error {
|
2022-10-19 10:01:26 +08:00
|
|
|
gibpt.Base = commonpbutil.NewMsgBase()
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) PreExecute(ctx context.Context) error {
|
|
|
|
gibpt.Base.MsgType = commonpb.MsgType_GetIndexBuildProgress
|
2022-11-04 14:25:38 +08:00
|
|
|
gibpt.Base.SourceID = paramtable.GetNodeID()
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
if err := validateCollectionName(gibpt.CollectionName); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) Execute(ctx context.Context) error {
|
|
|
|
collectionName := gibpt.CollectionName
|
2023-06-25 17:20:43 +08:00
|
|
|
collectionID, err := globalMetaCache.GetCollectionID(ctx, gibpt.GetDbName(), collectionName)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil { // err is not nil if collection not exists
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
gibpt.collectionID = collectionID
|
|
|
|
|
2023-01-11 14:35:40 +08:00
|
|
|
resp, err := gibpt.dataCoord.GetIndexBuildProgress(ctx, &indexpb.GetIndexBuildProgressRequest{
|
2022-10-08 15:38:58 +08:00
|
|
|
CollectionID: collectionID,
|
|
|
|
IndexName: gibpt.IndexName,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
gibpt.result = &milvuspb.GetIndexBuildProgressResponse{
|
|
|
|
Status: resp.Status,
|
|
|
|
TotalRows: resp.GetTotalRows(),
|
|
|
|
IndexedRows: resp.GetIndexedRows(),
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gibpt *getIndexBuildProgressTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Deprecated: use describeIndexTask instead
|
|
|
|
type getIndexStateTask struct {
|
2024-02-21 09:52:59 +08:00
|
|
|
baseTask
|
2022-10-08 15:38:58 +08:00
|
|
|
Condition
|
|
|
|
*milvuspb.GetIndexStateRequest
|
2023-01-04 19:37:36 +08:00
|
|
|
ctx context.Context
|
2023-09-26 09:57:25 +08:00
|
|
|
dataCoord types.DataCoordClient
|
|
|
|
rootCoord types.RootCoordClient
|
2023-01-04 19:37:36 +08:00
|
|
|
result *milvuspb.GetIndexStateResponse
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
collectionID UniqueID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) TraceCtx() context.Context {
|
|
|
|
return gist.ctx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) ID() UniqueID {
|
|
|
|
return gist.Base.MsgID
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) SetID(uid UniqueID) {
|
|
|
|
gist.Base.MsgID = uid
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) Name() string {
|
|
|
|
return GetIndexStateTaskName
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) Type() commonpb.MsgType {
|
|
|
|
return gist.Base.MsgType
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) BeginTs() Timestamp {
|
|
|
|
return gist.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) EndTs() Timestamp {
|
|
|
|
return gist.Base.Timestamp
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) SetTs(ts Timestamp) {
|
|
|
|
gist.Base.Timestamp = ts
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) OnEnqueue() error {
|
2022-10-19 10:01:26 +08:00
|
|
|
gist.Base = commonpbutil.NewMsgBase()
|
2022-10-08 15:38:58 +08:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) PreExecute(ctx context.Context) error {
|
|
|
|
gist.Base.MsgType = commonpb.MsgType_GetIndexState
|
2022-11-04 14:25:38 +08:00
|
|
|
gist.Base.SourceID = paramtable.GetNodeID()
|
2022-10-08 15:38:58 +08:00
|
|
|
|
|
|
|
if err := validateCollectionName(gist.CollectionName); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) Execute(ctx context.Context) error {
|
2023-06-25 17:20:43 +08:00
|
|
|
collectionID, err := globalMetaCache.GetCollectionID(ctx, gist.GetDbName(), gist.CollectionName)
|
2022-10-08 15:38:58 +08:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2023-01-11 14:35:40 +08:00
|
|
|
state, err := gist.dataCoord.GetIndexState(ctx, &indexpb.GetIndexStateRequest{
|
2022-10-08 15:38:58 +08:00
|
|
|
CollectionID: collectionID,
|
|
|
|
IndexName: gist.IndexName,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
gist.result = &milvuspb.GetIndexStateResponse{
|
2023-10-11 21:01:35 +08:00
|
|
|
Status: merr.Success(),
|
2022-10-08 15:38:58 +08:00
|
|
|
State: state.GetState(),
|
|
|
|
FailReason: state.GetFailReason(),
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (gist *getIndexStateTask) PostExecute(ctx context.Context) error {
|
|
|
|
return nil
|
|
|
|
}
|