Fix historm metrics buckets range for index (#26976)

See also: #26743

Signed-off-by: yangxuan <xuan.yang@zilliz.com>
This commit is contained in:
XuanYang-cn 2023-09-13 17:21:18 +08:00 committed by GitHub
parent 4fa878d03c
commit ea2b909c6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 15 additions and 13 deletions

View File

@ -228,7 +228,7 @@ func (it *indexBuildTask) LoadData(ctx context.Context) error {
}
loadFieldDataLatency := it.tr.CtxRecord(ctx, "load field data done")
metrics.IndexNodeLoadFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(loadFieldDataLatency.Milliseconds()))
metrics.IndexNodeLoadFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(loadFieldDataLatency.Seconds())
err = it.decodeBlobs(ctx, blobs)
if err != nil {
@ -349,7 +349,7 @@ func (it *indexBuildTask) BuildIndex(ctx context.Context) error {
}
buildIndexLatency := it.tr.RecordSpan()
metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(buildIndexLatency.Milliseconds()))
metrics.IndexNodeKnowhereBuildIndexLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(buildIndexLatency.Seconds())
log.Ctx(ctx).Info("Successfully build index", zap.Int64("buildID", it.BuildID), zap.Int64("Collection", it.collectionID), zap.Int64("SegmentID", it.segmentID))
return nil
@ -369,7 +369,7 @@ func (it *indexBuildTask) SaveIndexFiles(ctx context.Context) error {
return err
}
encodeIndexFileDur := it.tr.Record("index serialize and upload done")
metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(encodeIndexFileDur.Milliseconds()))
metrics.IndexNodeEncodeIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(encodeIndexFileDur.Seconds())
// early release index for gc, and we can ensure that Delete is idempotent.
gcIndex()
@ -388,7 +388,7 @@ func (it *indexBuildTask) SaveIndexFiles(ctx context.Context) error {
it.node.storeIndexFilesAndStatistic(it.ClusterID, it.BuildID, saveFileKeys, it.serializedSize, &it.statistic)
log.Ctx(ctx).Debug("save index files done", zap.Strings("IndexFiles", saveFileKeys))
saveIndexFileDur := it.tr.RecordSpan()
metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(saveIndexFileDur.Milliseconds()))
metrics.IndexNodeSaveIndexFileLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(saveIndexFileDur.Seconds())
it.tr.Elapse("index building all done")
log.Ctx(ctx).Info("Successfully save index files", zap.Int64("buildID", it.BuildID), zap.Int64("Collection", it.collectionID),
zap.Int64("partition", it.partitionID), zap.Int64("SegmentId", it.segmentID))
@ -435,8 +435,7 @@ func (it *indexBuildTask) decodeBlobs(ctx context.Context, blobs []*storage.Blob
if err2 != nil {
return err2
}
decodeDuration := it.tr.RecordSpan().Milliseconds()
metrics.IndexNodeDecodeFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(float64(decodeDuration))
metrics.IndexNodeDecodeFieldLatency.WithLabelValues(strconv.FormatInt(paramtable.GetNodeID(), 10)).Observe(it.tr.RecordSpan().Seconds())
if len(insertData.Data) != 1 {
return errors.New("we expect only one field in deserialized insert data")

View File

@ -233,7 +233,7 @@ func (sched *TaskScheduler) processTask(t task, q TaskQueue) {
}
t.SetState(commonpb.IndexState_Finished, "")
if indexBuildTask, ok := t.(*indexBuildTask); ok {
metrics.IndexNodeBuildIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(indexBuildTask.tr.ElapseSpan().Milliseconds()))
metrics.IndexNodeBuildIndexLatency.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(indexBuildTask.tr.ElapseSpan().Seconds())
metrics.IndexNodeIndexTaskLatencyInQueue.WithLabelValues(fmt.Sprint(paramtable.GetNodeID())).Observe(float64(indexBuildTask.queueDur.Milliseconds()))
}
}

View File

@ -23,6 +23,9 @@ import (
)
var (
// unit second, from 1ms to 2hrs
indexBucket = []float64{0.001, 0.1, 0.5, 1, 5, 10, 20, 50, 100, 250, 500, 1000, 3600, 5000, 10000}
IndexNodeBuildIndexTaskCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: milvusNamespace,
@ -37,7 +40,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "load_field_latency",
Help: "latency of loading the field data",
Buckets: buckets,
Buckets: indexBucket,
}, []string{nodeIDLabelName})
IndexNodeDecodeFieldLatency = prometheus.NewHistogramVec(
@ -46,7 +49,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "decode_field_latency",
Help: "latency of decode field data",
Buckets: buckets,
Buckets: indexBucket,
}, []string{nodeIDLabelName})
IndexNodeKnowhereBuildIndexLatency = prometheus.NewHistogramVec(
@ -55,7 +58,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "knowhere_build_index_latency",
Help: "latency of building the index by knowhere",
Buckets: buckets,
Buckets: indexBucket,
}, []string{nodeIDLabelName})
IndexNodeEncodeIndexFileLatency = prometheus.NewHistogramVec(
@ -64,7 +67,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "encode_index_latency",
Help: "latency of encoding the index file",
Buckets: buckets,
Buckets: indexBucket,
}, []string{nodeIDLabelName})
IndexNodeSaveIndexFileLatency = prometheus.NewHistogramVec(
@ -73,7 +76,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "save_index_latency",
Help: "latency of saving the index file",
Buckets: buckets,
Buckets: indexBucket,
}, []string{nodeIDLabelName})
IndexNodeIndexTaskLatencyInQueue = prometheus.NewHistogramVec(
@ -91,7 +94,7 @@ var (
Subsystem: typeutil.IndexNodeRole,
Name: "build_index_latency",
Help: "latency of build index for segment",
Buckets: buckets,
Buckets: indexBucket,
}, []string{nodeIDLabelName})
)