From 5e5787ddd2d80c22961e2131cf2dc8b59b7aa230 Mon Sep 17 00:00:00 2001 From: bigsheeper Date: Thu, 16 Dec 2021 17:25:13 +0800 Subject: [PATCH] Update memory util in metricinfo (#13320) Signed-off-by: bigsheeper --- internal/querynode/metrics_info.go | 24 +--------- internal/querynode/param_table.go | 14 +----- internal/querynode/segment_loader.go | 13 +++--- internal/util/metricsinfo/container_linux.go | 12 ++--- internal/util/metricsinfo/container_test.go | 6 +-- .../util/metricsinfo/container_windows.go | 12 ++--- internal/util/metricsinfo/hardware_info.go | 46 ++++++++++++++++++- 7 files changed, 68 insertions(+), 59 deletions(-) diff --git a/internal/querynode/metrics_info.go b/internal/querynode/metrics_info.go index 179251e398..2b770f8492 100644 --- a/internal/querynode/metrics_info.go +++ b/internal/querynode/metrics_info.go @@ -26,14 +26,8 @@ import ( ) func getSystemInfoMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest, node *QueryNode) (*milvuspb.GetMetricsResponse, error) { - usedMem, err := getUsedMemory() - if err != nil { - return nil, err - } - totalMem, err := getTotalMemory() - if err != nil { - return nil, err - } + usedMem := metricsinfo.GetUsedMemoryCount() + totalMem := metricsinfo.GetMemoryCount() nodeInfos := metricsinfo.QueryNodeInfos{ BaseComponentInfos: metricsinfo.BaseComponentInfos{ Name: metricsinfo.ConstructComponentName(typeutil.QueryNodeRole, Params.QueryNodeID), @@ -86,17 +80,3 @@ func getSystemInfoMetrics(ctx context.Context, req *milvuspb.GetMetricsRequest, ComponentName: metricsinfo.ConstructComponentName(typeutil.QueryNodeRole, Params.QueryNodeID), }, nil } - -func getUsedMemory() (uint64, error) { - if Params.InContainer { - return metricsinfo.GetContainerMemUsed() - } - return metricsinfo.GetUsedMemoryCount(), nil -} - -func getTotalMemory() (uint64, error) { - if Params.InContainer { - return metricsinfo.GetContainerMemLimit() - } - return metricsinfo.GetMemoryCount(), nil -} diff --git a/internal/querynode/param_table.go b/internal/querynode/param_table.go index bc4f895b8b..b0d728e480 100644 --- a/internal/querynode/param_table.go +++ b/internal/querynode/param_table.go @@ -26,7 +26,6 @@ import ( "go.uber.org/zap" "github.com/milvus-io/milvus/internal/log" - "github.com/milvus-io/milvus/internal/util/metricsinfo" "github.com/milvus-io/milvus/internal/util/paramtable" ) @@ -44,8 +43,7 @@ type ParamTable struct { QueryNodePort int64 QueryNodeID UniqueID // TODO: remove cacheSize - CacheSize int64 // deprecated - InContainer bool + CacheSize int64 // deprecated // channel prefix ClusterChannelPrefix string @@ -118,7 +116,6 @@ func (p *ParamTable) Init() { p.BaseTable.Init() p.initCacheSize() - p.initInContainer() p.initMinioEndPoint() p.initMinioAccessKeyID() @@ -178,15 +175,6 @@ func (p *ParamTable) initCacheSize() { p.CacheSize = value } -func (p *ParamTable) initInContainer() { - var err error - p.InContainer, err = metricsinfo.InContainer() - if err != nil { - panic(err) - } - log.Debug("init InContainer", zap.Any("is query node running inside a container? :", p.InContainer)) -} - // ---------------------------------------------------------- minio func (p *ParamTable) initMinioEndPoint() { url, err := p.Load("_MinioAddress") diff --git a/internal/querynode/segment_loader.go b/internal/querynode/segment_loader.go index 63c9c80bfd..026ba56681 100644 --- a/internal/querynode/segment_loader.go +++ b/internal/querynode/segment_loader.go @@ -41,6 +41,7 @@ import ( "github.com/milvus-io/milvus/internal/storage" "github.com/milvus-io/milvus/internal/types" "github.com/milvus-io/milvus/internal/util/funcutil" + "github.com/milvus-io/milvus/internal/util/metricsinfo" ) const timeoutForEachRead = 10 * time.Second @@ -615,13 +616,11 @@ func (loader *segmentLoader) estimateSegmentSize(segment *Segment, } func (loader *segmentLoader) checkSegmentSize(collectionID UniqueID, segmentSizes map[UniqueID]int64) error { - usedMem, err := getUsedMemory() - if err != nil { - return err - } - totalMem, err := getTotalMemory() - if err != nil { - return err + usedMem := metricsinfo.GetUsedMemoryCount() + totalMem := metricsinfo.GetMemoryCount() + + if usedMem == 0 || totalMem == 0 { + return errors.New(fmt.Sprintln("get memory failed when checkSegmentSize, collectionID = ", collectionID)) } segmentTotalSize := int64(0) diff --git a/internal/util/metricsinfo/container_linux.go b/internal/util/metricsinfo/container_linux.go index 422bcafa73..0c17059d55 100644 --- a/internal/util/metricsinfo/container_linux.go +++ b/internal/util/metricsinfo/container_linux.go @@ -18,8 +18,8 @@ import ( "github.com/containerd/cgroups" ) -// InContainer checks if the service is running inside a container. -func InContainer() (bool, error) { +// inContainer checks if the service is running inside a container. +func inContainer() (bool, error) { paths, err := cgroups.ParseCgroupFile("/proc/1/cgroup") if err != nil { return false, err @@ -28,8 +28,8 @@ func InContainer() (bool, error) { return devicePath != "", nil } -// GetContainerMemLimit returns memory limit and error -func GetContainerMemLimit() (uint64, error) { +// getContainerMemLimit returns memory limit and error +func getContainerMemLimit() (uint64, error) { control, err := cgroups.Load(cgroups.V1, cgroups.RootPath) if err != nil { return 0, err @@ -44,8 +44,8 @@ func GetContainerMemLimit() (uint64, error) { return stats.Memory.Usage.Limit, nil } -// GetContainerMemUsed returns memory usage and error -func GetContainerMemUsed() (uint64, error) { +// getContainerMemUsed returns memory usage and error +func getContainerMemUsed() (uint64, error) { control, err := cgroups.Load(cgroups.V1, cgroups.RootPath) if err != nil { return 0, err diff --git a/internal/util/metricsinfo/container_test.go b/internal/util/metricsinfo/container_test.go index e1d22083c0..efec4a7518 100644 --- a/internal/util/metricsinfo/container_test.go +++ b/internal/util/metricsinfo/container_test.go @@ -18,19 +18,19 @@ import ( ) func TestInContainer(t *testing.T) { - _, err := InContainer() + _, err := inContainer() assert.NoError(t, err) } func TestGetContainerMemLimit(t *testing.T) { - limit, err := GetContainerMemLimit() + limit, err := getContainerMemLimit() assert.NoError(t, err) assert.True(t, limit > 0) t.Log("limit memory:", limit) } func TestGetContainerMemUsed(t *testing.T) { - used, err := GetContainerMemUsed() + used, err := getContainerMemUsed() assert.NoError(t, err) assert.True(t, used > 0) t.Log("used memory:", used) diff --git a/internal/util/metricsinfo/container_windows.go b/internal/util/metricsinfo/container_windows.go index 64937acce4..18c32304e6 100644 --- a/internal/util/metricsinfo/container_windows.go +++ b/internal/util/metricsinfo/container_windows.go @@ -15,18 +15,18 @@ import ( "errors" ) -// IfServiceInContainer checks if the service is running inside a container +// inContainer checks if the service is running inside a container // It should be always false while under windows. -func InContainer() (bool, error) { +func inContainer() (bool, error) { return false, nil } -// GetContainerMemLimit returns memory limit and error -func GetContainerMemLimit() (uint64, error) { +// getContainerMemLimit returns memory limit and error +func getContainerMemLimit() (uint64, error) { return 0, errors.New("Not supported") } -// GetContainerMemUsed returns memory usage and error -func GetContainerMemUsed() (uint64, error) { +// getContainerMemUsed returns memory usage and error +func getContainerMemUsed() (uint64, error) { return 0, errors.New("Not supported") } diff --git a/internal/util/metricsinfo/hardware_info.go b/internal/util/metricsinfo/hardware_info.go index bc56d64897..23b7d24778 100644 --- a/internal/util/metricsinfo/hardware_info.go +++ b/internal/util/metricsinfo/hardware_info.go @@ -12,10 +12,19 @@ package metricsinfo import ( - "github.com/milvus-io/milvus/internal/log" + "sync" + "github.com/shirou/gopsutil/cpu" "github.com/shirou/gopsutil/mem" "go.uber.org/zap" + + "github.com/milvus-io/milvus/internal/log" +) + +var ( + icOnce sync.Once + ic bool + icErr error ) // GetCPUCoreCount returns the count of cpu core. @@ -50,18 +59,51 @@ func GetCPUUsage() float64 { // GetMemoryCount returns the memory count in bytes. func GetMemoryCount() uint64 { + icOnce.Do(func() { + ic, icErr = inContainer() + }) + if icErr != nil { + log.Error(icErr.Error()) + return 0 + } + if ic { + // in container, calculate by `cgroups` + limit, err := getContainerMemLimit() + if err != nil { + log.Error(err.Error()) + return 0 + } + return limit + } + // not in container, calculate by `gopsutil` stats, err := mem.VirtualMemory() if err != nil { log.Warn("failed to get memory count", zap.Error(err)) return 0 } - return stats.Total } // GetUsedMemoryCount returns the memory usage in bytes. func GetUsedMemoryCount() uint64 { + icOnce.Do(func() { + ic, icErr = inContainer() + }) + if icErr != nil { + log.Error(icErr.Error()) + return 0 + } + if ic { + // in container, calculate by `cgroups` + used, err := getContainerMemUsed() + if err != nil { + log.Error(err.Error()) + return 0 + } + return used + } + // not in container, calculate by `gopsutil` stats, err := mem.VirtualMemory() if err != nil { log.Warn("failed to get memory usage count",