mirror of
https://gitee.com/milvus-io/milvus.git
synced 2024-11-30 02:48:45 +08:00
a1232fafda
#30633 Signed-off-by: wayblink <anyang.wang@zilliz.com> Co-authored-by: MrPresent-Han <chun.han@zilliz.com>
380 lines
12 KiB
Go
380 lines
12 KiB
Go
// Licensed to the LF AI & Data foundation under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package metrics
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
"github.com/prometheus/client_golang/prometheus"
|
|
|
|
"github.com/milvus-io/milvus/pkg/util/typeutil"
|
|
)
|
|
|
|
const (
|
|
InsertFileLabel = "insert_file"
|
|
DeleteFileLabel = "delete_file"
|
|
StatFileLabel = "stat_file"
|
|
IndexFileLabel = "index_file"
|
|
segmentFileTypeLabelName = "segment_file_type"
|
|
)
|
|
|
|
var (
|
|
// DataCoordNumDataNodes records the num of data nodes managed by DataCoord.
|
|
DataCoordNumDataNodes = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "datanode_num",
|
|
Help: "number of data nodes",
|
|
}, []string{})
|
|
|
|
DataCoordNumSegments = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "segment_num",
|
|
Help: "number of segments",
|
|
}, []string{
|
|
segmentStateLabelName,
|
|
segmentLevelLabelName,
|
|
})
|
|
|
|
// DataCoordCollectionNum records the num of collections managed by DataCoord.
|
|
DataCoordNumCollections = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "collection_num",
|
|
Help: "number of collections",
|
|
}, []string{})
|
|
|
|
DataCoordSizeStoredL0Segment = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "store_level0_segment_size",
|
|
Help: "stored l0 segment size",
|
|
Buckets: buckets,
|
|
}, []string{
|
|
collectionIDLabelName,
|
|
})
|
|
|
|
DataCoordRateStoredL0Segment = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "store_level0_segment_rate",
|
|
Help: "stored l0 segment rate",
|
|
}, []string{})
|
|
|
|
// DataCoordNumStoredRows all metrics will be cleaned up after removing matched collectionID and
|
|
// segment state labels in CleanupDataCoordNumStoredRows method.
|
|
DataCoordNumStoredRows = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "stored_rows_num",
|
|
Help: "number of stored rows of healthy segment",
|
|
}, []string{
|
|
databaseLabelName,
|
|
collectionIDLabelName,
|
|
segmentStateLabelName,
|
|
})
|
|
|
|
DataCoordBulkVectors = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "bulk_insert_vectors_count",
|
|
Help: "counter of vectors successfully bulk inserted",
|
|
}, []string{
|
|
databaseLabelName,
|
|
collectionIDLabelName,
|
|
})
|
|
|
|
DataCoordConsumeDataNodeTimeTickLag = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "consume_datanode_tt_lag_ms",
|
|
Help: "now time minus tt per physical channel",
|
|
}, []string{
|
|
nodeIDLabelName,
|
|
channelNameLabelName,
|
|
})
|
|
|
|
DataCoordCheckpointUnixSeconds = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "channel_checkpoint_unix_seconds",
|
|
Help: "channel checkpoint timestamp in unix seconds",
|
|
}, []string{
|
|
nodeIDLabelName,
|
|
channelNameLabelName,
|
|
})
|
|
|
|
DataCoordStoredBinlogSize = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "stored_binlog_size",
|
|
Help: "binlog size of healthy segments",
|
|
}, []string{
|
|
databaseLabelName,
|
|
collectionIDLabelName,
|
|
segmentIDLabelName,
|
|
})
|
|
DataCoordSegmentBinLogFileCount = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "segment_binlog_file_count",
|
|
Help: "number of binlog files for each segment",
|
|
}, []string{
|
|
collectionIDLabelName,
|
|
segmentIDLabelName,
|
|
})
|
|
|
|
DataCoordStoredIndexFilesSize = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "stored_index_files_size",
|
|
Help: "index files size of the segments",
|
|
}, []string{
|
|
databaseLabelName,
|
|
collectionIDLabelName,
|
|
segmentIDLabelName,
|
|
})
|
|
|
|
DataCoordDmlChannelNum = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "watched_dml_chanel_num",
|
|
Help: "the num of dml channel watched by datanode",
|
|
}, []string{
|
|
nodeIDLabelName,
|
|
})
|
|
|
|
DataCoordCompactedSegmentSize = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "compacted_segment_size",
|
|
Help: "the segment size of compacted segment",
|
|
Buckets: sizeBuckets,
|
|
}, []string{})
|
|
|
|
DataCoordCompactionTaskNum = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "compaction_task_num",
|
|
Help: "Number of compaction tasks currently",
|
|
}, []string{
|
|
nodeIDLabelName,
|
|
compactionTypeLabelName,
|
|
statusLabelName,
|
|
})
|
|
|
|
DataCoordCompactionLatency = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "compaction_latency",
|
|
Help: "latency of compaction operation",
|
|
Buckets: longTaskBuckets,
|
|
}, []string{
|
|
isVectorFieldLabelName,
|
|
compactionTypeLabelName,
|
|
stageLabelName,
|
|
})
|
|
|
|
FlushedSegmentFileNum = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Name: "flushed_segment_file_num",
|
|
Help: "the num of files for flushed segment",
|
|
Buckets: buckets,
|
|
}, []string{segmentFileTypeLabelName})
|
|
|
|
/* garbage collector related metrics */
|
|
|
|
// GarbageCollectorFileScanDuration metrics for gc scan storage files.
|
|
GarbageCollectorFileScanDuration = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "gc_file_scan_duration",
|
|
Help: "duration of scan file in storage while garbage collecting (in milliseconds)",
|
|
Buckets: longTaskBuckets,
|
|
}, []string{nodeIDLabelName, segmentFileTypeLabelName})
|
|
|
|
GarbageCollectorRunCount = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "gc_run_count",
|
|
Help: "garbage collection running count",
|
|
}, []string{nodeIDLabelName})
|
|
|
|
/* hard to implement, commented now
|
|
DataCoordSegmentSizeRatio = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "segment_size_ratio",
|
|
Help: "size ratio compared to the configuration size",
|
|
Buckets: prometheus.LinearBuckets(0.0, 0.1, 15),
|
|
}, []string{})
|
|
|
|
DataCoordSegmentFlushDuration = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "segment_flush_duration",
|
|
Help: "time spent on each segment flush",
|
|
Buckets: []float64{0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds
|
|
}, []string{})
|
|
|
|
DataCoordCompactDuration = prometheus.NewHistogramVec(
|
|
prometheus.HistogramOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "segment_compact_duration",
|
|
Help: "time spent on each segment flush",
|
|
Buckets: []float64{0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}, // unit seconds
|
|
}, []string{})
|
|
|
|
DataCoordCompactLoad = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "compaction_load",
|
|
Help: "Information on the input and output of compaction",
|
|
}, []string{})
|
|
|
|
*/
|
|
|
|
// IndexRequestCounter records the number of the index requests.
|
|
IndexRequestCounter = prometheus.NewCounterVec(
|
|
prometheus.CounterOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "index_req_count",
|
|
Help: "number of building index requests ",
|
|
}, []string{statusLabelName})
|
|
|
|
// IndexTaskNum records the number of index tasks of each type.
|
|
IndexTaskNum = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "index_task_count",
|
|
Help: "number of index tasks of each type",
|
|
}, []string{collectionIDLabelName, indexTaskStatusLabelName})
|
|
|
|
// IndexNodeNum records the number of IndexNodes managed by IndexCoord.
|
|
IndexNodeNum = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "index_node_num",
|
|
Help: "number of IndexNodes managed by IndexCoord",
|
|
}, []string{})
|
|
|
|
ImportTasks = prometheus.NewGaugeVec(
|
|
prometheus.GaugeOpts{
|
|
Namespace: milvusNamespace,
|
|
Subsystem: typeutil.DataCoordRole,
|
|
Name: "import_tasks",
|
|
Help: "the import tasks grouping by type and state",
|
|
}, []string{"task_type", "import_state"})
|
|
)
|
|
|
|
// RegisterDataCoord registers DataCoord metrics
|
|
func RegisterDataCoord(registry *prometheus.Registry) {
|
|
registry.MustRegister(DataCoordNumDataNodes)
|
|
registry.MustRegister(DataCoordNumSegments)
|
|
registry.MustRegister(DataCoordNumCollections)
|
|
registry.MustRegister(DataCoordNumStoredRows)
|
|
registry.MustRegister(DataCoordBulkVectors)
|
|
registry.MustRegister(DataCoordConsumeDataNodeTimeTickLag)
|
|
registry.MustRegister(DataCoordCheckpointUnixSeconds)
|
|
registry.MustRegister(DataCoordStoredBinlogSize)
|
|
registry.MustRegister(DataCoordStoredIndexFilesSize)
|
|
registry.MustRegister(DataCoordSegmentBinLogFileCount)
|
|
registry.MustRegister(DataCoordDmlChannelNum)
|
|
registry.MustRegister(DataCoordCompactedSegmentSize)
|
|
registry.MustRegister(DataCoordCompactionTaskNum)
|
|
registry.MustRegister(DataCoordCompactionLatency)
|
|
registry.MustRegister(DataCoordSizeStoredL0Segment)
|
|
registry.MustRegister(DataCoordRateStoredL0Segment)
|
|
registry.MustRegister(FlushedSegmentFileNum)
|
|
registry.MustRegister(IndexRequestCounter)
|
|
registry.MustRegister(IndexTaskNum)
|
|
registry.MustRegister(IndexNodeNum)
|
|
registry.MustRegister(ImportTasks)
|
|
registry.MustRegister(GarbageCollectorFileScanDuration)
|
|
registry.MustRegister(GarbageCollectorRunCount)
|
|
}
|
|
|
|
func CleanupDataCoordSegmentMetrics(dbName string, collectionID int64, segmentID int64) {
|
|
DataCoordSegmentBinLogFileCount.
|
|
Delete(
|
|
prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
segmentIDLabelName: fmt.Sprint(segmentID),
|
|
})
|
|
DataCoordStoredBinlogSize.Delete(prometheus.Labels{
|
|
databaseLabelName: dbName,
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
segmentIDLabelName: fmt.Sprint(segmentID),
|
|
})
|
|
DataCoordStoredIndexFilesSize.Delete(prometheus.Labels{
|
|
databaseLabelName: dbName,
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
segmentIDLabelName: fmt.Sprint(segmentID),
|
|
})
|
|
}
|
|
|
|
func CleanupDataCoordWithCollectionID(collectionID int64) {
|
|
IndexTaskNum.DeletePartialMatch(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
DataCoordNumStoredRows.DeletePartialMatch(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
DataCoordBulkVectors.DeletePartialMatch(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
DataCoordSegmentBinLogFileCount.DeletePartialMatch(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
DataCoordStoredBinlogSize.DeletePartialMatch(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
DataCoordStoredIndexFilesSize.DeletePartialMatch(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
DataCoordSizeStoredL0Segment.Delete(prometheus.Labels{
|
|
collectionIDLabelName: fmt.Sprint(collectionID),
|
|
})
|
|
}
|