mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-12-12 13:15:09 +08:00
27cc9f2630
issue: #30633 Signed-off-by: Cai Zhang <cai.zhang@zilliz.com> Co-authored-by: chasingegg <chao.gao@zilliz.com>
183 lines
4.8 KiB
Go
183 lines
4.8 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package datacoord
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync"
|
|
|
|
"github.com/golang/protobuf/proto"
|
|
"go.uber.org/zap"
|
|
|
|
"github.com/milvus-io/milvus/internal/metastore"
|
|
"github.com/milvus-io/milvus/internal/proto/indexpb"
|
|
"github.com/milvus-io/milvus/pkg/log"
|
|
"github.com/milvus-io/milvus/pkg/util/timerecord"
|
|
)
|
|
|
|
type analyzeMeta struct {
|
|
sync.RWMutex
|
|
|
|
ctx context.Context
|
|
catalog metastore.DataCoordCatalog
|
|
|
|
// taskID -> analyzeStats
|
|
// TODO: when to mark as dropped?
|
|
tasks map[int64]*indexpb.AnalyzeTask
|
|
}
|
|
|
|
func newAnalyzeMeta(ctx context.Context, catalog metastore.DataCoordCatalog) (*analyzeMeta, error) {
|
|
mt := &analyzeMeta{
|
|
ctx: ctx,
|
|
catalog: catalog,
|
|
tasks: make(map[int64]*indexpb.AnalyzeTask),
|
|
}
|
|
|
|
if err := mt.reloadFromKV(); err != nil {
|
|
return nil, err
|
|
}
|
|
return mt, nil
|
|
}
|
|
|
|
func (m *analyzeMeta) reloadFromKV() error {
|
|
record := timerecord.NewTimeRecorder("analyzeMeta-reloadFromKV")
|
|
|
|
// load analyze stats
|
|
analyzeTasks, err := m.catalog.ListAnalyzeTasks(m.ctx)
|
|
if err != nil {
|
|
log.Warn("analyzeMeta reloadFromKV load analyze tasks failed", zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
for _, analyzeTask := range analyzeTasks {
|
|
m.tasks[analyzeTask.TaskID] = analyzeTask
|
|
}
|
|
log.Info("analyzeMeta reloadFromKV done", zap.Duration("duration", record.ElapseSpan()))
|
|
return nil
|
|
}
|
|
|
|
func (m *analyzeMeta) saveTask(newTask *indexpb.AnalyzeTask) error {
|
|
if err := m.catalog.SaveAnalyzeTask(m.ctx, newTask); err != nil {
|
|
return err
|
|
}
|
|
m.tasks[newTask.TaskID] = newTask
|
|
return nil
|
|
}
|
|
|
|
func (m *analyzeMeta) GetTask(taskID int64) *indexpb.AnalyzeTask {
|
|
m.RLock()
|
|
defer m.RUnlock()
|
|
|
|
return m.tasks[taskID]
|
|
}
|
|
|
|
func (m *analyzeMeta) AddAnalyzeTask(task *indexpb.AnalyzeTask) error {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
|
|
log.Info("add analyze task", zap.Int64("taskID", task.TaskID),
|
|
zap.Int64("collectionID", task.CollectionID), zap.Int64("partitionID", task.PartitionID))
|
|
return m.saveTask(task)
|
|
}
|
|
|
|
func (m *analyzeMeta) DropAnalyzeTask(taskID int64) error {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
|
|
log.Info("drop analyze task", zap.Int64("taskID", taskID))
|
|
if err := m.catalog.DropAnalyzeTask(m.ctx, taskID); err != nil {
|
|
log.Warn("drop analyze task by catalog failed", zap.Int64("taskID", taskID),
|
|
zap.Error(err))
|
|
return err
|
|
}
|
|
|
|
delete(m.tasks, taskID)
|
|
return nil
|
|
}
|
|
|
|
func (m *analyzeMeta) UpdateVersion(taskID int64) error {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
|
|
t, ok := m.tasks[taskID]
|
|
if !ok {
|
|
return fmt.Errorf("there is no task with taskID: %d", taskID)
|
|
}
|
|
|
|
cloneT := proto.Clone(t).(*indexpb.AnalyzeTask)
|
|
cloneT.Version++
|
|
log.Info("update task version", zap.Int64("taskID", taskID), zap.Int64("newVersion", cloneT.Version))
|
|
return m.saveTask(cloneT)
|
|
}
|
|
|
|
func (m *analyzeMeta) BuildingTask(taskID, nodeID int64) error {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
|
|
t, ok := m.tasks[taskID]
|
|
if !ok {
|
|
return fmt.Errorf("there is no task with taskID: %d", taskID)
|
|
}
|
|
|
|
cloneT := proto.Clone(t).(*indexpb.AnalyzeTask)
|
|
cloneT.NodeID = nodeID
|
|
cloneT.State = indexpb.JobState_JobStateInProgress
|
|
log.Info("task will be building", zap.Int64("taskID", taskID), zap.Int64("nodeID", nodeID))
|
|
|
|
return m.saveTask(cloneT)
|
|
}
|
|
|
|
func (m *analyzeMeta) FinishTask(taskID int64, result *indexpb.AnalyzeResult) error {
|
|
m.Lock()
|
|
defer m.Unlock()
|
|
|
|
t, ok := m.tasks[taskID]
|
|
if !ok {
|
|
return fmt.Errorf("there is no task with taskID: %d", taskID)
|
|
}
|
|
|
|
log.Info("finish task meta...", zap.Int64("taskID", taskID), zap.String("state", result.GetState().String()),
|
|
zap.String("failReason", result.GetFailReason()))
|
|
|
|
cloneT := proto.Clone(t).(*indexpb.AnalyzeTask)
|
|
cloneT.State = result.GetState()
|
|
cloneT.FailReason = result.GetFailReason()
|
|
cloneT.CentroidsFile = result.GetCentroidsFile()
|
|
return m.saveTask(cloneT)
|
|
}
|
|
|
|
func (m *analyzeMeta) GetAllTasks() map[int64]*indexpb.AnalyzeTask {
|
|
m.RLock()
|
|
defer m.RUnlock()
|
|
|
|
return m.tasks
|
|
}
|
|
|
|
func (m *analyzeMeta) CheckCleanAnalyzeTask(taskID UniqueID) (bool, *indexpb.AnalyzeTask) {
|
|
m.RLock()
|
|
defer m.RUnlock()
|
|
|
|
if t, ok := m.tasks[taskID]; ok {
|
|
if t.State == indexpb.JobState_JobStateFinished {
|
|
return true, t
|
|
}
|
|
return false, t
|
|
}
|
|
return true, nil
|
|
}
|