milvus/internal/indexnode/taskinfo_ops.go
cai.zhang 27cc9f2630
enhance: Support analyze data (#33651)
issue: #30633

Signed-off-by: Cai Zhang <cai.zhang@zilliz.com>
Co-authored-by: chasingegg <chao.gao@zilliz.com>
2024-06-06 17:37:51 +08:00

288 lines
7.7 KiB
Go

package indexnode
import (
"context"
"time"
"go.uber.org/zap"
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
"github.com/milvus-io/milvus/internal/proto/indexpb"
"github.com/milvus-io/milvus/pkg/common"
"github.com/milvus-io/milvus/pkg/log"
)
type indexTaskInfo struct {
cancel context.CancelFunc
state commonpb.IndexState
fileKeys []string
serializedSize uint64
failReason string
currentIndexVersion int32
indexStoreVersion int64
// task statistics
statistic *indexpb.JobInfo
}
func (i *IndexNode) loadOrStoreIndexTask(ClusterID string, buildID UniqueID, info *indexTaskInfo) *indexTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
oldInfo, ok := i.indexTasks[key]
if ok {
return oldInfo
}
i.indexTasks[key] = info
return nil
}
func (i *IndexNode) loadIndexTaskState(ClusterID string, buildID UniqueID) commonpb.IndexState {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
task, ok := i.indexTasks[key]
if !ok {
return commonpb.IndexState_IndexStateNone
}
return task.state
}
func (i *IndexNode) storeIndexTaskState(ClusterID string, buildID UniqueID, state commonpb.IndexState, failReason string) {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if task, ok := i.indexTasks[key]; ok {
log.Debug("IndexNode store task state", zap.String("clusterID", ClusterID), zap.Int64("buildID", buildID),
zap.String("state", state.String()), zap.String("fail reason", failReason))
task.state = state
task.failReason = failReason
}
}
func (i *IndexNode) foreachIndexTaskInfo(fn func(ClusterID string, buildID UniqueID, info *indexTaskInfo)) {
i.stateLock.Lock()
defer i.stateLock.Unlock()
for key, info := range i.indexTasks {
fn(key.ClusterID, key.BuildID, info)
}
}
func (i *IndexNode) storeIndexFilesAndStatistic(
ClusterID string,
buildID UniqueID,
fileKeys []string,
serializedSize uint64,
currentIndexVersion int32,
) {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.indexTasks[key]; ok {
info.fileKeys = common.CloneStringList(fileKeys)
info.serializedSize = serializedSize
info.currentIndexVersion = currentIndexVersion
return
}
}
func (i *IndexNode) storeIndexFilesAndStatisticV2(
ClusterID string,
buildID UniqueID,
fileKeys []string,
serializedSize uint64,
currentIndexVersion int32,
indexStoreVersion int64,
) {
key := taskKey{ClusterID: ClusterID, BuildID: buildID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.indexTasks[key]; ok {
info.fileKeys = common.CloneStringList(fileKeys)
info.serializedSize = serializedSize
info.currentIndexVersion = currentIndexVersion
info.indexStoreVersion = indexStoreVersion
return
}
}
func (i *IndexNode) deleteIndexTaskInfos(ctx context.Context, keys []taskKey) []*indexTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
deleted := make([]*indexTaskInfo, 0, len(keys))
for _, key := range keys {
info, ok := i.indexTasks[key]
if ok {
deleted = append(deleted, info)
delete(i.indexTasks, key)
log.Ctx(ctx).Info("delete task infos",
zap.String("cluster_id", key.ClusterID), zap.Int64("build_id", key.BuildID))
}
}
return deleted
}
func (i *IndexNode) deleteAllIndexTasks() []*indexTaskInfo {
i.stateLock.Lock()
deletedTasks := i.indexTasks
i.indexTasks = make(map[taskKey]*indexTaskInfo)
i.stateLock.Unlock()
deleted := make([]*indexTaskInfo, 0, len(deletedTasks))
for _, info := range deletedTasks {
deleted = append(deleted, info)
}
return deleted
}
type analyzeTaskInfo struct {
cancel context.CancelFunc
state indexpb.JobState
failReason string
centroidsFile string
}
func (i *IndexNode) loadOrStoreAnalyzeTask(clusterID string, taskID UniqueID, info *analyzeTaskInfo) *analyzeTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
key := taskKey{ClusterID: clusterID, BuildID: taskID}
oldInfo, ok := i.analyzeTasks[key]
if ok {
return oldInfo
}
i.analyzeTasks[key] = info
return nil
}
func (i *IndexNode) loadAnalyzeTaskState(clusterID string, taskID UniqueID) indexpb.JobState {
key := taskKey{ClusterID: clusterID, BuildID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
task, ok := i.analyzeTasks[key]
if !ok {
return indexpb.JobState_JobStateNone
}
return task.state
}
func (i *IndexNode) storeAnalyzeTaskState(clusterID string, taskID UniqueID, state indexpb.JobState, failReason string) {
key := taskKey{ClusterID: clusterID, BuildID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if task, ok := i.analyzeTasks[key]; ok {
log.Info("IndexNode store analyze task state", zap.String("clusterID", clusterID), zap.Int64("taskID", taskID),
zap.String("state", state.String()), zap.String("fail reason", failReason))
task.state = state
task.failReason = failReason
}
}
func (i *IndexNode) foreachAnalyzeTaskInfo(fn func(clusterID string, taskID UniqueID, info *analyzeTaskInfo)) {
i.stateLock.Lock()
defer i.stateLock.Unlock()
for key, info := range i.analyzeTasks {
fn(key.ClusterID, key.BuildID, info)
}
}
func (i *IndexNode) storeAnalyzeFilesAndStatistic(
ClusterID string,
taskID UniqueID,
centroidsFile string,
) {
key := taskKey{ClusterID: ClusterID, BuildID: taskID}
i.stateLock.Lock()
defer i.stateLock.Unlock()
if info, ok := i.analyzeTasks[key]; ok {
info.centroidsFile = centroidsFile
return
}
}
func (i *IndexNode) getAnalyzeTaskInfo(clusterID string, taskID UniqueID) *analyzeTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
return i.analyzeTasks[taskKey{ClusterID: clusterID, BuildID: taskID}]
}
func (i *IndexNode) deleteAnalyzeTaskInfos(ctx context.Context, keys []taskKey) []*analyzeTaskInfo {
i.stateLock.Lock()
defer i.stateLock.Unlock()
deleted := make([]*analyzeTaskInfo, 0, len(keys))
for _, key := range keys {
info, ok := i.analyzeTasks[key]
if ok {
deleted = append(deleted, info)
delete(i.analyzeTasks, key)
log.Ctx(ctx).Info("delete analyze task infos",
zap.String("clusterID", key.ClusterID), zap.Int64("taskID", key.BuildID))
}
}
return deleted
}
func (i *IndexNode) deleteAllAnalyzeTasks() []*analyzeTaskInfo {
i.stateLock.Lock()
deletedTasks := i.analyzeTasks
i.analyzeTasks = make(map[taskKey]*analyzeTaskInfo)
i.stateLock.Unlock()
deleted := make([]*analyzeTaskInfo, 0, len(deletedTasks))
for _, info := range deletedTasks {
deleted = append(deleted, info)
}
return deleted
}
func (i *IndexNode) hasInProgressTask() bool {
i.stateLock.Lock()
defer i.stateLock.Unlock()
for _, info := range i.indexTasks {
if info.state == commonpb.IndexState_InProgress {
return true
}
}
for _, info := range i.analyzeTasks {
if info.state == indexpb.JobState_JobStateInProgress {
return true
}
}
return false
}
func (i *IndexNode) waitTaskFinish() {
if !i.hasInProgressTask() {
return
}
gracefulTimeout := &Params.IndexNodeCfg.GracefulStopTimeout
ticker := time.NewTicker(time.Second)
defer ticker.Stop()
timeoutCtx, cancel := context.WithTimeout(i.loopCtx, gracefulTimeout.GetAsDuration(time.Second))
defer cancel()
for {
select {
case <-ticker.C:
if !i.hasInProgressTask() {
return
}
case <-timeoutCtx.Done():
log.Warn("timeout, the index node has some progress task")
for _, info := range i.indexTasks {
if info.state == commonpb.IndexState_InProgress {
log.Warn("progress task", zap.Any("info", info))
}
}
for _, info := range i.analyzeTasks {
if info.state == indexpb.JobState_JobStateInProgress {
log.Warn("progress task", zap.Any("info", info))
}
}
return
}
}
}