Add prometheus metrics for IndexCoord (#15638)

Signed-off-by: cai.zhang <cai.zhang@zilliz.com>
This commit is contained in:
cai.zhang 2022-02-21 17:15:51 +08:00 committed by GitHub
parent 7a78254628
commit b435c422c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 93 additions and 5 deletions

View File

@ -27,6 +27,8 @@ import (
"syscall"
"time"
"github.com/milvus-io/milvus/internal/metrics"
"go.etcd.io/etcd/api/v3/mvccpb"
"go.uber.org/zap"
@ -392,6 +394,7 @@ func (i *IndexCoord) BuildIndex(ctx context.Context, req *indexpb.BuildIndexRequ
},
}, err
}
metrics.IndexCoordIndexRequestCounter.WithLabelValues(metrics.TotalLabel).Inc()
log.Debug("IndexCoord building index ...",
zap.Int64("IndexBuildID", req.IndexBuildID),
zap.String("IndexName = ", req.IndexName),
@ -445,6 +448,7 @@ func (i *IndexCoord) BuildIndex(ctx context.Context, req *indexpb.BuildIndexRequ
if err != nil {
ret.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
ret.Status.Reason = err.Error()
metrics.IndexCoordIndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc()
return ret, nil
}
log.Debug("IndexCoord BuildIndex Enqueue successfully", zap.Int64("IndexBuildID", t.indexBuildID))
@ -454,11 +458,13 @@ func (i *IndexCoord) BuildIndex(ctx context.Context, req *indexpb.BuildIndexRequ
log.Error("IndexCoord scheduler index task failed", zap.Int64("IndexBuildID", t.indexBuildID))
ret.Status.ErrorCode = commonpb.ErrorCode_UnexpectedError
ret.Status.Reason = err.Error()
metrics.IndexCoordIndexRequestCounter.WithLabelValues(metrics.FailLabel).Inc()
return ret, nil
}
sp.SetTag("IndexCoord-IndexBuildID", strconv.FormatInt(t.indexBuildID, 10))
ret.Status.ErrorCode = commonpb.ErrorCode_Success
ret.IndexBuildID = t.indexBuildID
metrics.IndexCoordIndexRequestCounter.WithLabelValues(metrics.SuccessLabel).Inc()
return ret, nil
}
@ -736,6 +742,7 @@ func (i *IndexCoord) recycleUnusedIndexFiles() {
log.Debug("IndexCoord recycleUnusedIndexFiles",
zap.Int64("Recycle the low version index files successfully of the index with indexBuildID", meta.indexMeta.IndexBuildID))
}
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.RecycledIndexTaskLabel).Inc()
}
}
}
@ -823,6 +830,13 @@ func (i *IndexCoord) watchMetaLoop() {
zap.Int64("Finish by IndexNode", indexMeta.NodeID),
zap.Int64("The version of the task", indexMeta.Version))
i.nodeManager.pq.IncPriority(indexMeta.NodeID, -1)
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.InProgressIndexTaskLabel).Dec()
if indexMeta.State == commonpb.IndexState_Finished {
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.FinishedIndexTaskLabel).Inc()
}
if indexMeta.State == commonpb.IndexState_Failed {
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.FailedIndexTaskLabel).Inc()
}
}
case mvccpb.DELETE:
log.Debug("IndexCoord watchMetaLoop DELETE", zap.Int64("The meta has been deleted of indexBuildID", indexBuildID))

View File

@ -23,6 +23,8 @@ import (
"strconv"
"sync"
"github.com/milvus-io/milvus/internal/metrics"
"go.uber.org/zap"
"github.com/golang/protobuf/proto"
@ -159,6 +161,7 @@ func (mt *metaTable) AddIndex(indexBuildID UniqueID, req *indexpb.BuildIndexRequ
},
revision: 0,
}
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.UnissuedIndexTaskLabel).Inc()
return mt.saveIndexMeta(meta)
}
@ -185,6 +188,8 @@ func (mt *metaTable) BuildIndex(indexBuildID UniqueID, nodeID int64) error {
}
meta.indexMeta.NodeID = nodeID
meta.indexMeta.State = commonpb.IndexState_InProgress
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.UnissuedIndexTaskLabel).Dec()
metrics.IndexCoordIndexTaskCounter.WithLabelValues(metrics.InProgressIndexTaskLabel).Inc()
err := mt.saveIndexMeta(&meta)
if err != nil {

View File

@ -21,6 +21,8 @@ import (
"sync"
"time"
"github.com/milvus-io/milvus/internal/metrics"
"go.uber.org/zap"
grpcindexnodeclient "github.com/milvus-io/milvus/internal/distributed/indexnode/client"
@ -74,6 +76,7 @@ func (nm *NodeManager) RemoveNode(nodeID UniqueID) {
delete(nm.nodeClients, nodeID)
nm.lock.Unlock()
nm.pq.Remove(nodeID)
metrics.IndexCoordIndexNodeNum.WithLabelValues("index_node_num").Dec()
}
// AddNode adds the client of IndexNode.
@ -94,6 +97,7 @@ func (nm *NodeManager) AddNode(nodeID UniqueID, address string) error {
log.Error("IndexCoord NodeManager", zap.Any("Add node err", err))
return err
}
metrics.IndexCoordIndexNodeNum.WithLabelValues("index_node_num").Inc()
return nm.setClient(nodeID, nodeClient)
}

View File

@ -0,0 +1,70 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"github.com/milvus-io/milvus/internal/util/typeutil"
"github.com/prometheus/client_golang/prometheus"
)
const (
SuccessLabel = "success"
FailLabel = "fail"
TotalLabel = "total"
UnissuedIndexTaskLabel = "unissued"
InProgressIndexTaskLabel = "in-progress"
FinishedIndexTaskLabel = "finished"
FailedIndexTaskLabel = "failed"
RecycledIndexTaskLabel = "recycled"
)
var (
// IndexCoordIndexRequestCounter records the number of the index requests.
IndexCoordIndexRequestCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.IndexCoordRole,
Name: "index_req_counter",
Help: "The number of requests to build index",
}, []string{"status"})
// IndexCoordIndexTaskCounter records the number of index tasks of each type.
IndexCoordIndexTaskCounter = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.IndexCoordRole,
Name: "index_task_counter",
Help: "The number of index tasks of each type",
}, []string{"type"})
// IndexCoordIndexNodeNum records the number of IndexNodes managed by IndexCoord.
IndexCoordIndexNodeNum = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: milvusNamespace,
Subsystem: typeutil.IndexCoordRole,
Name: "index_node_num",
Help: "The number of IndexNodes managed by IndexCoord",
}, []string{"type"})
)
//RegisterIndexCoord registers IndexCoord metrics
func RegisterIndexCoord() {
prometheus.MustRegister(IndexCoordIndexRequestCounter)
prometheus.MustRegister(IndexCoordIndexTaskCounter)
prometheus.MustRegister(IndexCoordIndexNodeNum)
}

View File

@ -640,11 +640,6 @@ func RegisterDataNode() {
prometheus.MustRegister(DataNodeWatchDmChannelsCounter)
}
//RegisterIndexCoord registers IndexCoord metrics
func RegisterIndexCoord() {
}
//RegisterIndexNode registers IndexNode metrics
func RegisterIndexNode() {