mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 02:48:45 +08:00
enhance: Skip pick worker when task doesn't need to execute actually (#34348)
issue: #34347 Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
This commit is contained in:
parent
8165961223
commit
feb13cdf07
@ -72,7 +72,7 @@ func (s *Server) createIndexForSegment(segment *SegmentInfo, indexID UniqueID) e
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
s.taskScheduler.enqueue(&indexBuildTask{
|
s.taskScheduler.enqueue(&indexBuildTask{
|
||||||
buildID: buildID,
|
taskID: buildID,
|
||||||
taskInfo: &indexpb.IndexTaskInfo{
|
taskInfo: &indexpb.IndexTaskInfo{
|
||||||
BuildID: buildID,
|
BuildID: buildID,
|
||||||
State: commonpb.IndexState_Unissued,
|
State: commonpb.IndexState_Unissued,
|
||||||
|
@ -34,10 +34,14 @@ import (
|
|||||||
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var _ Task = (*analyzeTask)(nil)
|
||||||
|
|
||||||
type analyzeTask struct {
|
type analyzeTask struct {
|
||||||
taskID int64
|
taskID int64
|
||||||
nodeID int64
|
nodeID int64
|
||||||
taskInfo *indexpb.AnalyzeResult
|
taskInfo *indexpb.AnalyzeResult
|
||||||
|
|
||||||
|
req *indexpb.AnalyzeRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
func (at *analyzeTask) GetTaskID() int64 {
|
func (at *analyzeTask) GetTaskID() int64 {
|
||||||
@ -82,12 +86,12 @@ func (at *analyzeTask) UpdateMetaBuildingState(nodeID int64, meta *meta) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeClient, dependency *taskScheduler) (bool, bool) {
|
func (at *analyzeTask) PreCheck(ctx context.Context, dependency *taskScheduler) bool {
|
||||||
t := dependency.meta.analyzeMeta.GetTask(at.GetTaskID())
|
t := dependency.meta.analyzeMeta.GetTask(at.GetTaskID())
|
||||||
if t == nil {
|
if t == nil {
|
||||||
log.Ctx(ctx).Info("task is nil, delete it", zap.Int64("taskID", at.GetTaskID()))
|
log.Ctx(ctx).Info("task is nil, delete it", zap.Int64("taskID", at.GetTaskID()))
|
||||||
at.SetState(indexpb.JobState_JobStateNone, "analyze task is nil")
|
at.SetState(indexpb.JobState_JobStateNone, "analyze task is nil")
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
var storageConfig *indexpb.StorageConfig
|
var storageConfig *indexpb.StorageConfig
|
||||||
@ -113,7 +117,7 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
|
|||||||
RequestTimeoutMs: Params.MinioCfg.RequestTimeoutMs.GetAsInt64(),
|
RequestTimeoutMs: Params.MinioCfg.RequestTimeoutMs.GetAsInt64(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
req := &indexpb.AnalyzeRequest{
|
at.req = &indexpb.AnalyzeRequest{
|
||||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||||
TaskID: at.GetTaskID(),
|
TaskID: at.GetTaskID(),
|
||||||
CollectionID: t.CollectionID,
|
CollectionID: t.CollectionID,
|
||||||
@ -123,7 +127,7 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
|
|||||||
FieldType: t.FieldType,
|
FieldType: t.FieldType,
|
||||||
Dim: t.Dim,
|
Dim: t.Dim,
|
||||||
SegmentStats: make(map[int64]*indexpb.SegmentStats),
|
SegmentStats: make(map[int64]*indexpb.SegmentStats),
|
||||||
Version: t.Version,
|
Version: t.Version + 1,
|
||||||
StorageConfig: storageConfig,
|
StorageConfig: storageConfig,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -142,13 +146,13 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
|
|||||||
log.Ctx(ctx).Warn("analyze stats task is processing, but segment is nil, delete the task",
|
log.Ctx(ctx).Warn("analyze stats task is processing, but segment is nil, delete the task",
|
||||||
zap.Int64("taskID", at.GetTaskID()), zap.Int64("segmentID", segID))
|
zap.Int64("taskID", at.GetTaskID()), zap.Int64("segmentID", segID))
|
||||||
at.SetState(indexpb.JobState_JobStateFailed, fmt.Sprintf("segmentInfo with ID: %d is nil", segID))
|
at.SetState(indexpb.JobState_JobStateFailed, fmt.Sprintf("segmentInfo with ID: %d is nil", segID))
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
totalSegmentsRows += info.GetNumOfRows()
|
totalSegmentsRows += info.GetNumOfRows()
|
||||||
// get binlogIDs
|
// get binlogIDs
|
||||||
binlogIDs := getBinLogIDs(info, t.FieldID)
|
binlogIDs := getBinLogIDs(info, t.FieldID)
|
||||||
req.SegmentStats[segID] = &indexpb.SegmentStats{
|
at.req.SegmentStats[segID] = &indexpb.SegmentStats{
|
||||||
ID: segID,
|
ID: segID,
|
||||||
NumRows: info.GetNumOfRows(),
|
NumRows: info.GetNumOfRows(),
|
||||||
LogIDs: binlogIDs,
|
LogIDs: binlogIDs,
|
||||||
@ -160,7 +164,7 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
|
|||||||
log.Ctx(ctx).Info("analyze task get collection info failed", zap.Int64("collectionID",
|
log.Ctx(ctx).Info("analyze task get collection info failed", zap.Int64("collectionID",
|
||||||
segments[0].GetCollectionID()), zap.Error(err))
|
segments[0].GetCollectionID()), zap.Error(err))
|
||||||
at.SetState(indexpb.JobState_JobStateInit, err.Error())
|
at.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
schema := collInfo.Schema
|
schema := collInfo.Schema
|
||||||
@ -175,35 +179,39 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
|
|||||||
dim, err := storage.GetDimFromParams(field.TypeParams)
|
dim, err := storage.GetDimFromParams(field.TypeParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
at.SetState(indexpb.JobState_JobStateInit, err.Error())
|
at.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
req.Dim = int64(dim)
|
at.req.Dim = int64(dim)
|
||||||
|
|
||||||
totalSegmentsRawDataSize := float64(totalSegmentsRows) * float64(dim) * typeutil.VectorTypeSize(t.FieldType) // Byte
|
totalSegmentsRawDataSize := float64(totalSegmentsRows) * float64(dim) * typeutil.VectorTypeSize(t.FieldType) // Byte
|
||||||
numClusters := int64(math.Ceil(totalSegmentsRawDataSize / float64(Params.DataCoordCfg.ClusteringCompactionPreferSegmentSize.GetAsSize())))
|
numClusters := int64(math.Ceil(totalSegmentsRawDataSize / float64(Params.DataCoordCfg.ClusteringCompactionPreferSegmentSize.GetAsSize())))
|
||||||
if numClusters < Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.GetAsInt64() {
|
if numClusters < Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.GetAsInt64() {
|
||||||
log.Ctx(ctx).Info("data size is too small, skip analyze task", zap.Float64("raw data size", totalSegmentsRawDataSize), zap.Int64("num clusters", numClusters), zap.Int64("minimum num clusters required", Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.GetAsInt64()))
|
log.Ctx(ctx).Info("data size is too small, skip analyze task", zap.Float64("raw data size", totalSegmentsRawDataSize), zap.Int64("num clusters", numClusters), zap.Int64("minimum num clusters required", Params.DataCoordCfg.ClusteringCompactionMinCentroidsNum.GetAsInt64()))
|
||||||
at.SetState(indexpb.JobState_JobStateFinished, "")
|
at.SetState(indexpb.JobState_JobStateFinished, "")
|
||||||
return true, true
|
return true
|
||||||
}
|
}
|
||||||
if numClusters > Params.DataCoordCfg.ClusteringCompactionMaxCentroidsNum.GetAsInt64() {
|
if numClusters > Params.DataCoordCfg.ClusteringCompactionMaxCentroidsNum.GetAsInt64() {
|
||||||
numClusters = Params.DataCoordCfg.ClusteringCompactionMaxCentroidsNum.GetAsInt64()
|
numClusters = Params.DataCoordCfg.ClusteringCompactionMaxCentroidsNum.GetAsInt64()
|
||||||
}
|
}
|
||||||
req.NumClusters = numClusters
|
at.req.NumClusters = numClusters
|
||||||
req.MaxTrainSizeRatio = Params.DataCoordCfg.ClusteringCompactionMaxTrainSizeRatio.GetAsFloat() // control clustering train data size
|
at.req.MaxTrainSizeRatio = Params.DataCoordCfg.ClusteringCompactionMaxTrainSizeRatio.GetAsFloat() // control clustering train data size
|
||||||
// config to detect data skewness
|
// config to detect data skewness
|
||||||
req.MinClusterSizeRatio = Params.DataCoordCfg.ClusteringCompactionMinClusterSizeRatio.GetAsFloat()
|
at.req.MinClusterSizeRatio = Params.DataCoordCfg.ClusteringCompactionMinClusterSizeRatio.GetAsFloat()
|
||||||
req.MaxClusterSizeRatio = Params.DataCoordCfg.ClusteringCompactionMaxClusterSizeRatio.GetAsFloat()
|
at.req.MaxClusterSizeRatio = Params.DataCoordCfg.ClusteringCompactionMaxClusterSizeRatio.GetAsFloat()
|
||||||
req.MaxClusterSize = Params.DataCoordCfg.ClusteringCompactionMaxClusterSize.GetAsSize()
|
at.req.MaxClusterSize = Params.DataCoordCfg.ClusteringCompactionMaxClusterSize.GetAsSize()
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeClient) bool {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), reqTimeoutInterval)
|
ctx, cancel := context.WithTimeout(context.Background(), reqTimeoutInterval)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
resp, err := client.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
|
resp, err := client.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
|
||||||
ClusterID: req.GetClusterID(),
|
ClusterID: at.req.GetClusterID(),
|
||||||
TaskID: req.GetTaskID(),
|
TaskID: at.req.GetTaskID(),
|
||||||
JobType: indexpb.JobType_JobTypeAnalyzeJob,
|
JobType: indexpb.JobType_JobTypeAnalyzeJob,
|
||||||
Request: &indexpb.CreateJobV2Request_AnalyzeRequest{
|
Request: &indexpb.CreateJobV2Request_AnalyzeRequest{
|
||||||
AnalyzeRequest: req,
|
AnalyzeRequest: at.req,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@ -212,12 +220,12 @@ func (at *analyzeTask) AssignTask(ctx context.Context, client types.IndexNodeCli
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("assign analyze task to indexNode failed", zap.Int64("taskID", at.GetTaskID()), zap.Error(err))
|
log.Ctx(ctx).Warn("assign analyze task to indexNode failed", zap.Int64("taskID", at.GetTaskID()), zap.Error(err))
|
||||||
at.SetState(indexpb.JobState_JobStateRetry, err.Error())
|
at.SetState(indexpb.JobState_JobStateRetry, err.Error())
|
||||||
return false, true
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Ctx(ctx).Info("analyze task assigned successfully", zap.Int64("taskID", at.GetTaskID()))
|
log.Ctx(ctx).Info("analyze task assigned successfully", zap.Int64("taskID", at.GetTaskID()))
|
||||||
at.SetState(indexpb.JobState_JobStateInProgress, "")
|
at.SetState(indexpb.JobState_JobStateInProgress, "")
|
||||||
return true, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (at *analyzeTask) setResult(result *indexpb.AnalyzeResult) {
|
func (at *analyzeTask) setResult(result *indexpb.AnalyzeResult) {
|
||||||
|
@ -37,13 +37,17 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
type indexBuildTask struct {
|
type indexBuildTask struct {
|
||||||
buildID int64
|
taskID int64
|
||||||
nodeID int64
|
nodeID int64
|
||||||
taskInfo *indexpb.IndexTaskInfo
|
taskInfo *indexpb.IndexTaskInfo
|
||||||
|
|
||||||
|
req *indexpb.CreateJobRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var _ Task = (*indexBuildTask)(nil)
|
||||||
|
|
||||||
func (it *indexBuildTask) GetTaskID() int64 {
|
func (it *indexBuildTask) GetTaskID() int64 {
|
||||||
return it.buildID
|
return it.taskID
|
||||||
}
|
}
|
||||||
|
|
||||||
func (it *indexBuildTask) GetNodeID() int64 {
|
func (it *indexBuildTask) GetNodeID() int64 {
|
||||||
@ -73,35 +77,35 @@ func (it *indexBuildTask) GetFailReason() string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (it *indexBuildTask) UpdateVersion(ctx context.Context, meta *meta) error {
|
func (it *indexBuildTask) UpdateVersion(ctx context.Context, meta *meta) error {
|
||||||
return meta.indexMeta.UpdateVersion(it.buildID)
|
return meta.indexMeta.UpdateVersion(it.taskID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (it *indexBuildTask) UpdateMetaBuildingState(nodeID int64, meta *meta) error {
|
func (it *indexBuildTask) UpdateMetaBuildingState(nodeID int64, meta *meta) error {
|
||||||
it.nodeID = nodeID
|
it.nodeID = nodeID
|
||||||
return meta.indexMeta.BuildIndex(it.buildID, nodeID)
|
return meta.indexMeta.BuildIndex(it.taskID, nodeID)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNodeClient, dependency *taskScheduler) (bool, bool) {
|
func (it *indexBuildTask) PreCheck(ctx context.Context, dependency *taskScheduler) bool {
|
||||||
segIndex, exist := dependency.meta.indexMeta.GetIndexJob(it.buildID)
|
segIndex, exist := dependency.meta.indexMeta.GetIndexJob(it.taskID)
|
||||||
if !exist || segIndex == nil {
|
if !exist || segIndex == nil {
|
||||||
log.Ctx(ctx).Info("index task has not exist in meta table, remove task", zap.Int64("buildID", it.buildID))
|
log.Ctx(ctx).Info("index task has not exist in meta table, remove task", zap.Int64("taskID", it.taskID))
|
||||||
it.SetState(indexpb.JobState_JobStateNone, "index task has not exist in meta table")
|
it.SetState(indexpb.JobState_JobStateNone, "index task has not exist in meta table")
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
segment := dependency.meta.GetSegment(segIndex.SegmentID)
|
segment := dependency.meta.GetSegment(segIndex.SegmentID)
|
||||||
if !isSegmentHealthy(segment) || !dependency.meta.indexMeta.IsIndexExist(segIndex.CollectionID, segIndex.IndexID) {
|
if !isSegmentHealthy(segment) || !dependency.meta.indexMeta.IsIndexExist(segIndex.CollectionID, segIndex.IndexID) {
|
||||||
log.Ctx(ctx).Info("task is no need to build index, remove it", zap.Int64("buildID", it.buildID))
|
log.Ctx(ctx).Info("task is no need to build index, remove it", zap.Int64("taskID", it.taskID))
|
||||||
it.SetState(indexpb.JobState_JobStateNone, "task is no need to build index")
|
it.SetState(indexpb.JobState_JobStateNone, "task is no need to build index")
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
indexParams := dependency.meta.indexMeta.GetIndexParams(segIndex.CollectionID, segIndex.IndexID)
|
indexParams := dependency.meta.indexMeta.GetIndexParams(segIndex.CollectionID, segIndex.IndexID)
|
||||||
indexType := GetIndexType(indexParams)
|
indexType := GetIndexType(indexParams)
|
||||||
if isFlatIndex(indexType) || segIndex.NumRows < Params.DataCoordCfg.MinSegmentNumRowsToEnableIndex.GetAsInt64() {
|
if isFlatIndex(indexType) || segIndex.NumRows < Params.DataCoordCfg.MinSegmentNumRowsToEnableIndex.GetAsInt64() {
|
||||||
log.Ctx(ctx).Info("segment does not need index really", zap.Int64("buildID", it.buildID),
|
log.Ctx(ctx).Info("segment does not need index really", zap.Int64("taskID", it.taskID),
|
||||||
zap.Int64("segmentID", segIndex.SegmentID), zap.Int64("num rows", segIndex.NumRows))
|
zap.Int64("segmentID", segIndex.SegmentID), zap.Int64("num rows", segIndex.NumRows))
|
||||||
it.SetState(indexpb.JobState_JobStateFinished, "fake finished index success")
|
it.SetState(indexpb.JobState_JobStateFinished, "fake finished index success")
|
||||||
return true, true
|
return true
|
||||||
}
|
}
|
||||||
// vector index build needs information of optional scalar fields data
|
// vector index build needs information of optional scalar fields data
|
||||||
optionalFields := make([]*indexpb.OptionalFieldInfo, 0)
|
optionalFields := make([]*indexpb.OptionalFieldInfo, 0)
|
||||||
@ -110,12 +114,12 @@ func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNode
|
|||||||
if err != nil || collInfo == nil {
|
if err != nil || collInfo == nil {
|
||||||
log.Ctx(ctx).Warn("get collection failed", zap.Int64("collID", segIndex.CollectionID), zap.Error(err))
|
log.Ctx(ctx).Warn("get collection failed", zap.Int64("collID", segIndex.CollectionID), zap.Error(err))
|
||||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
colSchema := collInfo.Schema
|
colSchema := collInfo.Schema
|
||||||
partitionKeyField, err := typeutil.GetPartitionKeyFieldSchema(colSchema)
|
partitionKeyField, err := typeutil.GetPartitionKeyFieldSchema(colSchema)
|
||||||
if partitionKeyField == nil || err != nil {
|
if partitionKeyField == nil || err != nil {
|
||||||
log.Ctx(ctx).Warn("index builder get partition key field failed", zap.Int64("buildID", it.buildID), zap.Error(err))
|
log.Ctx(ctx).Warn("index builder get partition key field failed", zap.Int64("taskID", it.taskID), zap.Error(err))
|
||||||
} else {
|
} else {
|
||||||
if typeutil.IsFieldDataTypeSupportMaterializedView(partitionKeyField) {
|
if typeutil.IsFieldDataTypeSupportMaterializedView(partitionKeyField) {
|
||||||
optionalFields = append(optionalFields, &indexpb.OptionalFieldInfo{
|
optionalFields = append(optionalFields, &indexpb.OptionalFieldInfo{
|
||||||
@ -161,16 +165,16 @@ func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNode
|
|||||||
var err error
|
var err error
|
||||||
indexParams, err = indexparams.UpdateDiskIndexBuildParams(Params, indexParams)
|
indexParams, err = indexparams.UpdateDiskIndexBuildParams(Params, indexParams)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("failed to append index build params", zap.Int64("buildID", it.buildID), zap.Error(err))
|
log.Ctx(ctx).Warn("failed to append index build params", zap.Int64("taskID", it.taskID), zap.Error(err))
|
||||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
var req *indexpb.CreateJobRequest
|
|
||||||
collectionInfo, err := dependency.handler.GetCollection(ctx, segment.GetCollectionID())
|
collectionInfo, err := dependency.handler.GetCollection(ctx, segment.GetCollectionID())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err))
|
log.Ctx(ctx).Info("index builder get collection info failed", zap.Int64("collectionID", segment.GetCollectionID()), zap.Error(err))
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
schema := collectionInfo.Schema
|
schema := collectionInfo.Schema
|
||||||
@ -183,7 +187,7 @@ func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNode
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dim, err := storage.GetDimFromParams(field.TypeParams)
|
dim, err := storage.GetDimFromParams(field.GetTypeParams())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("failed to get dim from field type params",
|
log.Ctx(ctx).Warn("failed to get dim from field type params",
|
||||||
zap.String("field type", field.GetDataType().String()), zap.Error(err))
|
zap.String("field type", field.GetDataType().String()), zap.Error(err))
|
||||||
@ -195,84 +199,90 @@ func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNode
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
||||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
indexStorePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue()+"/index", segment.GetID())
|
indexStorePath, err := itypeutil.GetStorageURI(params.Params.CommonCfg.StorageScheme.GetValue(), params.Params.CommonCfg.StoragePathPrefix.GetValue()+"/index", segment.GetID())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
log.Ctx(ctx).Warn("failed to get storage uri", zap.Error(err))
|
||||||
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
it.SetState(indexpb.JobState_JobStateInit, err.Error())
|
||||||
return false, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
req = &indexpb.CreateJobRequest{
|
it.req = &indexpb.CreateJobRequest{
|
||||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
||||||
BuildID: it.buildID,
|
BuildID: it.taskID,
|
||||||
IndexVersion: segIndex.IndexVersion,
|
IndexVersion: segIndex.IndexVersion + 1,
|
||||||
StorageConfig: storageConfig,
|
StorageConfig: storageConfig,
|
||||||
IndexParams: indexParams,
|
IndexParams: indexParams,
|
||||||
TypeParams: typeParams,
|
TypeParams: typeParams,
|
||||||
NumRows: segIndex.NumRows,
|
NumRows: segIndex.NumRows,
|
||||||
|
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
||||||
CollectionID: segment.GetCollectionID(),
|
CollectionID: segment.GetCollectionID(),
|
||||||
PartitionID: segment.GetPartitionID(),
|
PartitionID: segment.GetPartitionID(),
|
||||||
SegmentID: segment.GetID(),
|
SegmentID: segment.GetID(),
|
||||||
FieldID: fieldID,
|
FieldID: fieldID,
|
||||||
FieldName: field.Name,
|
FieldName: field.GetName(),
|
||||||
FieldType: field.DataType,
|
FieldType: field.GetDataType(),
|
||||||
StorePath: storePath,
|
StorePath: storePath,
|
||||||
StoreVersion: segment.GetStorageVersion(),
|
StoreVersion: segment.GetStorageVersion(),
|
||||||
IndexStorePath: indexStorePath,
|
IndexStorePath: indexStorePath,
|
||||||
Dim: int64(dim),
|
Dim: int64(dim),
|
||||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
|
||||||
DataIds: binlogIDs,
|
DataIds: binlogIDs,
|
||||||
OptionalScalarFields: optionalFields,
|
OptionalScalarFields: optionalFields,
|
||||||
Field: field,
|
Field: field,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
req = &indexpb.CreateJobRequest{
|
it.req = &indexpb.CreateJobRequest{
|
||||||
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
ClusterID: Params.CommonCfg.ClusterPrefix.GetValue(),
|
||||||
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
IndexFilePrefix: path.Join(dependency.chunkManager.RootPath(), common.SegmentIndexPath),
|
||||||
BuildID: it.buildID,
|
BuildID: it.taskID,
|
||||||
IndexVersion: segIndex.IndexVersion,
|
IndexVersion: segIndex.IndexVersion + 1,
|
||||||
StorageConfig: storageConfig,
|
StorageConfig: storageConfig,
|
||||||
IndexParams: indexParams,
|
IndexParams: indexParams,
|
||||||
TypeParams: typeParams,
|
TypeParams: typeParams,
|
||||||
NumRows: segIndex.NumRows,
|
NumRows: segIndex.NumRows,
|
||||||
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
CurrentIndexVersion: dependency.indexEngineVersionManager.GetCurrentIndexEngineVersion(),
|
||||||
DataIds: binlogIDs,
|
|
||||||
CollectionID: segment.GetCollectionID(),
|
CollectionID: segment.GetCollectionID(),
|
||||||
PartitionID: segment.GetPartitionID(),
|
PartitionID: segment.GetPartitionID(),
|
||||||
SegmentID: segment.GetID(),
|
SegmentID: segment.GetID(),
|
||||||
FieldID: fieldID,
|
FieldID: fieldID,
|
||||||
OptionalScalarFields: optionalFields,
|
FieldName: field.GetName(),
|
||||||
|
FieldType: field.GetDataType(),
|
||||||
Dim: int64(dim),
|
Dim: int64(dim),
|
||||||
|
DataIds: binlogIDs,
|
||||||
|
OptionalScalarFields: optionalFields,
|
||||||
Field: field,
|
Field: field,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Ctx(ctx).Info("index task pre check successfully", zap.Int64("taskID", it.GetTaskID()))
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (it *indexBuildTask) AssignTask(ctx context.Context, client types.IndexNodeClient) bool {
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), reqTimeoutInterval)
|
ctx, cancel := context.WithTimeout(context.Background(), reqTimeoutInterval)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
resp, err := client.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
|
resp, err := client.CreateJobV2(ctx, &indexpb.CreateJobV2Request{
|
||||||
ClusterID: req.GetClusterID(),
|
ClusterID: it.req.GetClusterID(),
|
||||||
TaskID: req.GetBuildID(),
|
TaskID: it.req.GetBuildID(),
|
||||||
JobType: indexpb.JobType_JobTypeIndexJob,
|
JobType: indexpb.JobType_JobTypeIndexJob,
|
||||||
Request: &indexpb.CreateJobV2Request_IndexRequest{
|
Request: &indexpb.CreateJobV2Request_IndexRequest{
|
||||||
IndexRequest: req,
|
IndexRequest: it.req,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
if err == nil {
|
if err == nil {
|
||||||
err = merr.Error(resp)
|
err = merr.Error(resp)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("assign index task to indexNode failed", zap.Int64("buildID", it.buildID), zap.Error(err))
|
log.Ctx(ctx).Warn("assign index task to indexNode failed", zap.Int64("taskID", it.taskID), zap.Error(err))
|
||||||
it.SetState(indexpb.JobState_JobStateRetry, err.Error())
|
it.SetState(indexpb.JobState_JobStateRetry, err.Error())
|
||||||
return false, true
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Ctx(ctx).Info("index task assigned successfully", zap.Int64("buildID", it.buildID),
|
log.Ctx(ctx).Info("index task assigned successfully", zap.Int64("taskID", it.taskID))
|
||||||
zap.Int64("segmentID", segIndex.SegmentID))
|
|
||||||
it.SetState(indexpb.JobState_JobStateInProgress, "")
|
it.SetState(indexpb.JobState_JobStateInProgress, "")
|
||||||
return true, false
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (it *indexBuildTask) setResult(info *indexpb.IndexTaskInfo) {
|
func (it *indexBuildTask) setResult(info *indexpb.IndexTaskInfo) {
|
||||||
@ -289,7 +299,7 @@ func (it *indexBuildTask) QueryResult(ctx context.Context, node types.IndexNodeC
|
|||||||
err = merr.Error(resp.GetStatus())
|
err = merr.Error(resp.GetStatus())
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Ctx(ctx).Warn("get jobs info from IndexNode failed", zap.Int64("buildID", it.GetTaskID()),
|
log.Ctx(ctx).Warn("get jobs info from IndexNode failed", zap.Int64("taskID", it.GetTaskID()),
|
||||||
zap.Int64("nodeID", it.GetNodeID()), zap.Error(err))
|
zap.Int64("nodeID", it.GetNodeID()), zap.Error(err))
|
||||||
it.SetState(indexpb.JobState_JobStateRetry, err.Error())
|
it.SetState(indexpb.JobState_JobStateRetry, err.Error())
|
||||||
return
|
return
|
||||||
|
@ -100,8 +100,8 @@ func (s *taskScheduler) reloadFromKV() {
|
|||||||
}
|
}
|
||||||
if segIndex.IndexState != commonpb.IndexState_Finished && segIndex.IndexState != commonpb.IndexState_Failed {
|
if segIndex.IndexState != commonpb.IndexState_Finished && segIndex.IndexState != commonpb.IndexState_Failed {
|
||||||
s.tasks[segIndex.BuildID] = &indexBuildTask{
|
s.tasks[segIndex.BuildID] = &indexBuildTask{
|
||||||
buildID: segIndex.BuildID,
|
taskID: segIndex.BuildID,
|
||||||
nodeID: segIndex.NodeID,
|
nodeID: segIndex.NodeID,
|
||||||
taskInfo: &indexpb.IndexTaskInfo{
|
taskInfo: &indexpb.IndexTaskInfo{
|
||||||
BuildID: segIndex.BuildID,
|
BuildID: segIndex.BuildID,
|
||||||
State: segIndex.IndexState,
|
State: segIndex.IndexState,
|
||||||
@ -223,6 +223,12 @@ func (s *taskScheduler) process(taskID UniqueID) bool {
|
|||||||
s.removeTask(taskID)
|
s.removeTask(taskID)
|
||||||
|
|
||||||
case indexpb.JobState_JobStateInit:
|
case indexpb.JobState_JobStateInit:
|
||||||
|
// 0. pre check task
|
||||||
|
skip := task.PreCheck(s.ctx, s)
|
||||||
|
if skip {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
// 1. pick an indexNode client
|
// 1. pick an indexNode client
|
||||||
nodeID, client := s.nodeManager.PickClient()
|
nodeID, client := s.nodeManager.PickClient()
|
||||||
if client == nil {
|
if client == nil {
|
||||||
@ -239,17 +245,13 @@ func (s *taskScheduler) process(taskID UniqueID) bool {
|
|||||||
log.Ctx(s.ctx).Info("update task version success", zap.Int64("taskID", taskID))
|
log.Ctx(s.ctx).Info("update task version success", zap.Int64("taskID", taskID))
|
||||||
|
|
||||||
// 3. assign task to indexNode
|
// 3. assign task to indexNode
|
||||||
success, skip := task.AssignTask(s.ctx, client, s)
|
success := task.AssignTask(s.ctx, client)
|
||||||
if !success {
|
if !success {
|
||||||
log.Ctx(s.ctx).Warn("assign task to client failed", zap.Int64("taskID", taskID),
|
log.Ctx(s.ctx).Warn("assign task to client failed", zap.Int64("taskID", taskID),
|
||||||
zap.String("new state", task.GetState().String()), zap.String("fail reason", task.GetFailReason()))
|
zap.String("new state", task.GetState().String()), zap.String("fail reason", task.GetFailReason()))
|
||||||
// If the problem is caused by the task itself, subsequent tasks will not be skipped.
|
// If the problem is caused by the task itself, subsequent tasks will not be skipped.
|
||||||
// If etcd fails or fails to send tasks to the node, the subsequent tasks will be skipped.
|
// If etcd fails or fails to send tasks to the node, the subsequent tasks will be skipped.
|
||||||
return !skip
|
return false
|
||||||
}
|
|
||||||
if skip {
|
|
||||||
// create index for small segment(<1024), skip next steps.
|
|
||||||
return true
|
|
||||||
}
|
}
|
||||||
log.Ctx(s.ctx).Info("assign task to client success", zap.Int64("taskID", taskID), zap.Int64("nodeID", nodeID))
|
log.Ctx(s.ctx).Info("assign task to client success", zap.Int64("taskID", taskID), zap.Int64("nodeID", nodeID))
|
||||||
|
|
||||||
|
@ -927,7 +927,6 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
|
|||||||
ctx := context.Background()
|
ctx := context.Background()
|
||||||
|
|
||||||
catalog := catalogmocks.NewDataCoordCatalog(s.T())
|
catalog := catalogmocks.NewDataCoordCatalog(s.T())
|
||||||
in := mocks.NewMockIndexNodeClient(s.T())
|
|
||||||
workerManager := NewMockWorkerManager(s.T())
|
workerManager := NewMockWorkerManager(s.T())
|
||||||
|
|
||||||
mt := createMeta(catalog,
|
mt := createMeta(catalog,
|
||||||
@ -958,9 +957,7 @@ func (s *taskSchedulerSuite) Test_analyzeTaskFailCase() {
|
|||||||
scheduler.scheduleDuration = s.duration
|
scheduler.scheduleDuration = s.duration
|
||||||
scheduler.Start()
|
scheduler.Start()
|
||||||
|
|
||||||
// taskID 1 peek client success, update version success. AssignTask failed --> state: Failed --> save
|
// taskID 1 PreCheck failed --> state: Failed --> save
|
||||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
|
||||||
catalog.EXPECT().SaveAnalyzeTask(mock.Anything, mock.Anything).Return(nil).Once()
|
|
||||||
catalog.EXPECT().SaveAnalyzeTask(mock.Anything, mock.Anything).Return(nil).Once()
|
catalog.EXPECT().SaveAnalyzeTask(mock.Anything, mock.Anything).Return(nil).Once()
|
||||||
workerManager.EXPECT().GetClientByID(mock.Anything).Return(nil, false).Once()
|
workerManager.EXPECT().GetClientByID(mock.Anything).Return(nil, false).Once()
|
||||||
|
|
||||||
@ -1298,14 +1295,10 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||||||
defer Params.CommonCfg.EnableStorageV2.SwapTempValue("False")
|
defer Params.CommonCfg.EnableStorageV2.SwapTempValue("False")
|
||||||
scheduler.Start()
|
scheduler.Start()
|
||||||
|
|
||||||
// peek client success, update version success, get collection info failed --> init
|
// get collection info failed --> init
|
||||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
|
||||||
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||||
|
|
||||||
// peek client success, update version success, partition key field is nil, get collection info failed --> init
|
// partition key field is nil, get collection info failed --> init
|
||||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
|
||||||
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
||||||
ID: collID,
|
ID: collID,
|
||||||
Schema: &schemapb.CollectionSchema{
|
Schema: &schemapb.CollectionSchema{
|
||||||
@ -1316,9 +1309,7 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||||||
}, nil).Once()
|
}, nil).Once()
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(nil, errors.New("mock error")).Once()
|
||||||
|
|
||||||
// peek client success, update version success, get collection info success, get dim failed --> init
|
// get collection info success, get dim failed --> init
|
||||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
|
||||||
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).Return(&collectionInfo{
|
||||||
ID: collID,
|
ID: collID,
|
||||||
Schema: &schemapb.CollectionSchema{
|
Schema: &schemapb.CollectionSchema{
|
||||||
@ -1331,8 +1322,6 @@ func (s *taskSchedulerSuite) Test_indexTaskFailCase() {
|
|||||||
|
|
||||||
// peek client success, update version success, get collection info success, get dim success, get storage uri failed --> init
|
// peek client success, update version success, get collection info success, get dim success, get storage uri failed --> init
|
||||||
s.NoError(err)
|
s.NoError(err)
|
||||||
workerManager.EXPECT().PickClient().Return(s.nodeID, in).Once()
|
|
||||||
catalog.EXPECT().AlterSegmentIndexes(mock.Anything, mock.Anything).Return(nil).Once()
|
|
||||||
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
|
handler.EXPECT().GetCollection(mock.Anything, mock.Anything).RunAndReturn(func(ctx context.Context, i int64) (*collectionInfo, error) {
|
||||||
return &collectionInfo{
|
return &collectionInfo{
|
||||||
ID: collID,
|
ID: collID,
|
||||||
@ -1676,14 +1665,12 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
|
|||||||
return merr.Success(), nil
|
return merr.Success(), nil
|
||||||
}).Once()
|
}).Once()
|
||||||
t := &indexBuildTask{
|
t := &indexBuildTask{
|
||||||
buildID: buildID,
|
taskID: buildID,
|
||||||
nodeID: nodeID,
|
nodeID: nodeID,
|
||||||
taskInfo: &indexpb.IndexTaskInfo{
|
taskInfo: &indexpb.IndexTaskInfo{
|
||||||
BuildID: buildID,
|
BuildID: buildID,
|
||||||
State: commonpb.IndexState_Unissued,
|
State: commonpb.IndexState_Unissued,
|
||||||
FailReason: "",
|
FailReason: "",
|
||||||
// CurrentIndexVersion: 0,
|
|
||||||
// IndexStoreVersion: 0,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
scheduler.enqueue(t)
|
scheduler.enqueue(t)
|
||||||
@ -1701,8 +1688,8 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
|
|||||||
return merr.Success(), nil
|
return merr.Success(), nil
|
||||||
}).Once()
|
}).Once()
|
||||||
t := &indexBuildTask{
|
t := &indexBuildTask{
|
||||||
buildID: buildID,
|
taskID: buildID,
|
||||||
nodeID: nodeID,
|
nodeID: nodeID,
|
||||||
taskInfo: &indexpb.IndexTaskInfo{
|
taskInfo: &indexpb.IndexTaskInfo{
|
||||||
BuildID: buildID,
|
BuildID: buildID,
|
||||||
State: commonpb.IndexState_Unissued,
|
State: commonpb.IndexState_Unissued,
|
||||||
@ -1730,8 +1717,8 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
|
|||||||
return merr.Success(), nil
|
return merr.Success(), nil
|
||||||
}).Once()
|
}).Once()
|
||||||
t := &indexBuildTask{
|
t := &indexBuildTask{
|
||||||
buildID: buildID,
|
taskID: buildID,
|
||||||
nodeID: nodeID,
|
nodeID: nodeID,
|
||||||
taskInfo: &indexpb.IndexTaskInfo{
|
taskInfo: &indexpb.IndexTaskInfo{
|
||||||
BuildID: buildID,
|
BuildID: buildID,
|
||||||
State: commonpb.IndexState_Unissued,
|
State: commonpb.IndexState_Unissued,
|
||||||
@ -1753,8 +1740,8 @@ func (s *taskSchedulerSuite) Test_indexTaskWithMvOptionalScalarField() {
|
|||||||
return merr.Success(), nil
|
return merr.Success(), nil
|
||||||
}).Once()
|
}).Once()
|
||||||
t := &indexBuildTask{
|
t := &indexBuildTask{
|
||||||
buildID: buildID,
|
taskID: buildID,
|
||||||
nodeID: nodeID,
|
nodeID: nodeID,
|
||||||
taskInfo: &indexpb.IndexTaskInfo{
|
taskInfo: &indexpb.IndexTaskInfo{
|
||||||
BuildID: buildID,
|
BuildID: buildID,
|
||||||
State: commonpb.IndexState_Unissued,
|
State: commonpb.IndexState_Unissued,
|
||||||
|
@ -27,13 +27,14 @@ type Task interface {
|
|||||||
GetTaskID() int64
|
GetTaskID() int64
|
||||||
GetNodeID() int64
|
GetNodeID() int64
|
||||||
ResetNodeID()
|
ResetNodeID()
|
||||||
|
PreCheck(ctx context.Context, dependency *taskScheduler) bool
|
||||||
CheckTaskHealthy(mt *meta) bool
|
CheckTaskHealthy(mt *meta) bool
|
||||||
SetState(state indexpb.JobState, failReason string)
|
SetState(state indexpb.JobState, failReason string)
|
||||||
GetState() indexpb.JobState
|
GetState() indexpb.JobState
|
||||||
GetFailReason() string
|
GetFailReason() string
|
||||||
UpdateVersion(ctx context.Context, meta *meta) error
|
UpdateVersion(ctx context.Context, meta *meta) error
|
||||||
UpdateMetaBuildingState(nodeID int64, meta *meta) error
|
UpdateMetaBuildingState(nodeID int64, meta *meta) error
|
||||||
AssignTask(ctx context.Context, client types.IndexNodeClient, dependency *taskScheduler) (bool, bool)
|
AssignTask(ctx context.Context, client types.IndexNodeClient) bool
|
||||||
QueryResult(ctx context.Context, client types.IndexNodeClient)
|
QueryResult(ctx context.Context, client types.IndexNodeClient)
|
||||||
DropTaskOnWorker(ctx context.Context, client types.IndexNodeClient) bool
|
DropTaskOnWorker(ctx context.Context, client types.IndexNodeClient) bool
|
||||||
SetJobInfo(meta *meta) error
|
SetJobInfo(meta *meta) error
|
||||||
|
Loading…
Reference in New Issue
Block a user