2022-03-15 21:51:21 +08:00
|
|
|
// Licensed to the LF AI & Data foundation under one
|
|
|
|
// or more contributor license agreements. See the NOTICE file
|
|
|
|
// distributed with this work for additional information
|
|
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
|
|
// to you under the Apache License, Version 2.0 (the
|
|
|
|
// "License"); you may not use this file except in compliance
|
|
|
|
// with the License. You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package metrics
|
|
|
|
|
|
|
|
import (
|
2023-02-13 10:24:33 +08:00
|
|
|
"fmt"
|
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
|
2023-08-17 15:52:17 +08:00
|
|
|
"github.com/milvus-io/milvus-proto/go-api/v2/commonpb"
|
2023-04-06 19:14:32 +08:00
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
2022-03-15 21:51:21 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2023-06-26 17:52:44 +08:00
|
|
|
InsertFileLabel = "insert_file"
|
|
|
|
DeleteFileLabel = "delete_file"
|
|
|
|
StatFileLabel = "stat_file"
|
|
|
|
IndexFileLabel = "index_file"
|
|
|
|
segmentFileTypeLabelName = "segment_file_type"
|
2022-03-15 21:51:21 +08:00
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
2023-09-21 09:45:27 +08:00
|
|
|
// DataCoordNumDataNodes records the num of data nodes managed by DataCoord.
|
2022-03-15 21:51:21 +08:00
|
|
|
DataCoordNumDataNodes = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2022-04-27 23:03:47 +08:00
|
|
|
Name: "datanode_num",
|
|
|
|
Help: "number of data nodes",
|
2022-03-15 21:51:21 +08:00
|
|
|
}, []string{})
|
|
|
|
|
|
|
|
DataCoordNumSegments = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2022-04-27 23:03:47 +08:00
|
|
|
Name: "segment_num",
|
2022-03-15 21:51:21 +08:00
|
|
|
Help: "number of segments",
|
|
|
|
}, []string{
|
2022-04-27 23:03:47 +08:00
|
|
|
segmentStateLabelName,
|
2023-11-24 15:58:24 +08:00
|
|
|
segmentLevelLabelName,
|
2022-03-15 21:51:21 +08:00
|
|
|
})
|
|
|
|
|
2023-09-21 09:45:27 +08:00
|
|
|
// DataCoordCollectionNum records the num of collections managed by DataCoord.
|
2022-03-15 21:51:21 +08:00
|
|
|
DataCoordNumCollections = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2022-04-27 23:03:47 +08:00
|
|
|
Name: "collection_num",
|
|
|
|
Help: "number of collections",
|
2022-03-15 21:51:21 +08:00
|
|
|
}, []string{})
|
|
|
|
|
2023-11-24 15:58:24 +08:00
|
|
|
DataCoordSizeStoredL0Segment = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2023-12-20 11:20:44 +08:00
|
|
|
Name: "store_level0_segment_size",
|
2023-11-24 15:58:24 +08:00
|
|
|
Help: "stored l0 segment size",
|
2023-12-20 11:20:44 +08:00
|
|
|
Buckets: buckets,
|
|
|
|
}, []string{
|
|
|
|
collectionIDLabelName,
|
|
|
|
})
|
2023-11-24 15:58:24 +08:00
|
|
|
|
|
|
|
DataCoordRateStoredL0Segment = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2023-12-20 11:20:44 +08:00
|
|
|
Name: "store_level0_segment_rate",
|
2023-11-24 15:58:24 +08:00
|
|
|
Help: "stored l0 segment rate",
|
|
|
|
}, []string{})
|
|
|
|
|
2022-03-15 21:51:21 +08:00
|
|
|
DataCoordNumStoredRows = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2022-04-27 23:03:47 +08:00
|
|
|
Name: "stored_rows_num",
|
2023-08-17 15:52:17 +08:00
|
|
|
Help: "number of stored rows of healthy segment",
|
|
|
|
}, []string{
|
|
|
|
collectionIDLabelName,
|
|
|
|
segmentStateLabelName,
|
|
|
|
})
|
2022-03-15 21:51:21 +08:00
|
|
|
|
2022-04-29 15:35:47 +08:00
|
|
|
DataCoordNumStoredRowsCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "stored_rows_count",
|
|
|
|
Help: "count of all stored rows ever",
|
|
|
|
}, []string{})
|
|
|
|
|
2022-11-07 10:15:02 +08:00
|
|
|
DataCoordConsumeDataNodeTimeTickLag = prometheus.NewGaugeVec(
|
2022-03-15 21:51:21 +08:00
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2022-11-07 10:15:02 +08:00
|
|
|
Name: "consume_datanode_tt_lag_ms",
|
|
|
|
Help: "now time minus tt per physical channel",
|
|
|
|
}, []string{
|
|
|
|
nodeIDLabelName,
|
|
|
|
channelNameLabelName,
|
|
|
|
})
|
2022-03-15 21:51:21 +08:00
|
|
|
|
2023-12-20 11:04:45 +08:00
|
|
|
DataCoordCheckpointUnixSeconds = prometheus.NewGaugeVec(
|
2023-11-28 07:00:28 +08:00
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
2023-12-20 11:04:45 +08:00
|
|
|
Name: "channel_checkpoint_unix_seconds",
|
|
|
|
Help: "channel checkpoint timestamp in unix seconds",
|
2023-11-28 07:00:28 +08:00
|
|
|
}, []string{
|
|
|
|
nodeIDLabelName,
|
|
|
|
channelNameLabelName,
|
|
|
|
})
|
|
|
|
|
2022-10-19 15:13:26 +08:00
|
|
|
DataCoordStoredBinlogSize = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "stored_binlog_size",
|
2023-05-31 16:45:29 +08:00
|
|
|
Help: "binlog size of healthy segments",
|
2023-05-07 20:52:38 +08:00
|
|
|
}, []string{
|
|
|
|
collectionIDLabelName,
|
|
|
|
segmentIDLabelName,
|
|
|
|
})
|
2022-10-19 15:13:26 +08:00
|
|
|
|
2023-02-13 10:24:33 +08:00
|
|
|
DataCoordSegmentBinLogFileCount = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "segment_binlog_file_count",
|
|
|
|
Help: "number of binlog files for each segment",
|
|
|
|
}, []string{
|
|
|
|
collectionIDLabelName,
|
|
|
|
segmentIDLabelName,
|
|
|
|
})
|
|
|
|
|
2023-06-26 17:52:44 +08:00
|
|
|
DataCoordDmlChannelNum = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "watched_dml_chanel_num",
|
|
|
|
Help: "the num of dml channel watched by datanode",
|
|
|
|
}, []string{
|
|
|
|
nodeIDLabelName,
|
|
|
|
})
|
|
|
|
|
|
|
|
DataCoordCompactedSegmentSize = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "compacted_segment_size",
|
|
|
|
Help: "the segment size of compacted segment",
|
|
|
|
Buckets: buckets,
|
|
|
|
}, []string{})
|
|
|
|
|
2023-12-28 15:46:55 +08:00
|
|
|
DataCoordCompactionTaskNum = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "compaction_task_num",
|
|
|
|
Help: "Number of compaction tasks currently",
|
|
|
|
}, []string{
|
|
|
|
nodeIDLabelName,
|
|
|
|
compactionTypeLabelName,
|
|
|
|
statusLabelName,
|
|
|
|
})
|
|
|
|
|
2023-06-26 17:52:44 +08:00
|
|
|
FlushedSegmentFileNum = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Name: "flushed_segment_file_num",
|
|
|
|
Help: "the num of files for flushed segment",
|
|
|
|
Buckets: buckets,
|
|
|
|
}, []string{segmentFileTypeLabelName})
|
|
|
|
|
2023-09-22 18:47:25 +08:00
|
|
|
/* garbage collector related metrics */
|
|
|
|
|
|
|
|
// GarbageCollectorListLatency metrics for gc scan storage files.
|
|
|
|
GarbageCollectorListLatency = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "gc_list_latency",
|
|
|
|
Help: "latency of list objects in storage while garbage collecting (in milliseconds)",
|
|
|
|
Buckets: longTaskBuckets,
|
|
|
|
}, []string{nodeIDLabelName, segmentFileTypeLabelName})
|
|
|
|
|
|
|
|
GarbageCollectorRunCount = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "gc_run_count",
|
|
|
|
Help: "garbage collection running count",
|
|
|
|
}, []string{nodeIDLabelName})
|
|
|
|
|
2022-03-15 21:51:21 +08:00
|
|
|
/* hard to implement, commented now
|
|
|
|
DataCoordSegmentSizeRatio = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "segment_size_ratio",
|
|
|
|
Help: "size ratio compared to the configuration size",
|
|
|
|
Buckets: prometheus.LinearBuckets(0.0, 0.1, 15),
|
|
|
|
}, []string{})
|
|
|
|
|
|
|
|
DataCoordSegmentFlushDuration = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "segment_flush_duration",
|
|
|
|
Help: "time spent on each segment flush",
|
|
|
|
Buckets: []float64{0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds
|
|
|
|
}, []string{})
|
|
|
|
|
|
|
|
DataCoordCompactDuration = prometheus.NewHistogramVec(
|
|
|
|
prometheus.HistogramOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "segment_compact_duration",
|
|
|
|
Help: "time spent on each segment flush",
|
|
|
|
Buckets: []float64{0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds
|
2023-12-28 15:46:55 +08:00
|
|
|
}, []string{})
|
2022-03-15 21:51:21 +08:00
|
|
|
|
|
|
|
DataCoordCompactLoad = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "compaction_load",
|
|
|
|
Help: "Information on the input and output of compaction",
|
2023-12-28 15:46:55 +08:00
|
|
|
}, []string{})
|
2022-03-15 21:51:21 +08:00
|
|
|
|
|
|
|
*/
|
|
|
|
|
2023-01-06 14:21:37 +08:00
|
|
|
// IndexRequestCounter records the number of the index requests.
|
|
|
|
IndexRequestCounter = prometheus.NewCounterVec(
|
|
|
|
prometheus.CounterOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "index_req_count",
|
|
|
|
Help: "number of building index requests ",
|
|
|
|
}, []string{statusLabelName})
|
|
|
|
|
|
|
|
// IndexTaskNum records the number of index tasks of each type.
|
|
|
|
IndexTaskNum = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "index_task_count",
|
|
|
|
Help: "number of index tasks of each type",
|
|
|
|
}, []string{collectionIDLabelName, indexTaskStatusLabelName})
|
|
|
|
|
|
|
|
// IndexNodeNum records the number of IndexNodes managed by IndexCoord.
|
|
|
|
IndexNodeNum = prometheus.NewGaugeVec(
|
|
|
|
prometheus.GaugeOpts{
|
|
|
|
Namespace: milvusNamespace,
|
|
|
|
Subsystem: typeutil.DataCoordRole,
|
|
|
|
Name: "index_node_num",
|
|
|
|
Help: "number of IndexNodes managed by IndexCoord",
|
|
|
|
}, []string{})
|
2022-03-15 21:51:21 +08:00
|
|
|
)
|
|
|
|
|
2022-11-22 19:21:13 +08:00
|
|
|
// RegisterDataCoord registers DataCoord metrics
|
2022-03-18 15:51:22 +08:00
|
|
|
func RegisterDataCoord(registry *prometheus.Registry) {
|
|
|
|
registry.MustRegister(DataCoordNumDataNodes)
|
|
|
|
registry.MustRegister(DataCoordNumSegments)
|
|
|
|
registry.MustRegister(DataCoordNumCollections)
|
|
|
|
registry.MustRegister(DataCoordNumStoredRows)
|
2022-06-30 17:26:19 +08:00
|
|
|
registry.MustRegister(DataCoordNumStoredRowsCounter)
|
2022-11-07 10:15:02 +08:00
|
|
|
registry.MustRegister(DataCoordConsumeDataNodeTimeTickLag)
|
2023-12-20 11:04:45 +08:00
|
|
|
registry.MustRegister(DataCoordCheckpointUnixSeconds)
|
2022-10-19 15:13:26 +08:00
|
|
|
registry.MustRegister(DataCoordStoredBinlogSize)
|
2023-02-13 10:24:33 +08:00
|
|
|
registry.MustRegister(DataCoordSegmentBinLogFileCount)
|
2023-06-26 17:52:44 +08:00
|
|
|
registry.MustRegister(DataCoordDmlChannelNum)
|
|
|
|
registry.MustRegister(DataCoordCompactedSegmentSize)
|
2023-11-24 15:58:24 +08:00
|
|
|
registry.MustRegister(DataCoordSizeStoredL0Segment)
|
|
|
|
registry.MustRegister(DataCoordRateStoredL0Segment)
|
2023-06-26 17:52:44 +08:00
|
|
|
registry.MustRegister(FlushedSegmentFileNum)
|
2023-01-06 14:21:37 +08:00
|
|
|
registry.MustRegister(IndexRequestCounter)
|
|
|
|
registry.MustRegister(IndexTaskNum)
|
|
|
|
registry.MustRegister(IndexNodeNum)
|
2022-03-15 21:51:21 +08:00
|
|
|
}
|
2023-02-13 10:24:33 +08:00
|
|
|
|
|
|
|
func CleanupDataCoordSegmentMetrics(collectionID int64, segmentID int64) {
|
|
|
|
DataCoordSegmentBinLogFileCount.
|
|
|
|
Delete(
|
|
|
|
prometheus.Labels{
|
|
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
|
|
segmentIDLabelName: fmt.Sprint(segmentID),
|
|
|
|
})
|
2023-05-07 20:52:38 +08:00
|
|
|
DataCoordStoredBinlogSize.Delete(prometheus.Labels{
|
|
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
|
|
segmentIDLabelName: fmt.Sprint(segmentID),
|
|
|
|
})
|
2023-02-13 10:24:33 +08:00
|
|
|
}
|
2023-08-17 15:52:17 +08:00
|
|
|
|
|
|
|
func CleanupDataCoordNumStoredRows(collectionID int64) {
|
|
|
|
for _, state := range commonpb.SegmentState_name {
|
|
|
|
DataCoordNumStoredRows.Delete(prometheus.Labels{
|
|
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
|
|
segmentStateLabelName: fmt.Sprint(state),
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|