diff --git a/cpp/src/metrics/PrometheusMetrics.cpp b/cpp/src/metrics/PrometheusMetrics.cpp index e5db5a9264..8672817428 100644 --- a/cpp/src/metrics/PrometheusMetrics.cpp +++ b/cpp/src/metrics/PrometheusMetrics.cpp @@ -37,6 +37,7 @@ PrometheusMetrics::Init() { } + void PrometheusMetrics::CPUUsagePercentSet() { if(!startup_) return ; @@ -54,8 +55,11 @@ PrometheusMetrics::RAMUsagePercentSet() { void PrometheusMetrics::GPUPercentGaugeSet() { if(!startup_) return; - int numDevide = server::SystemInfo::GetInstance().NumDevice(); + int numDevide = server::SystemInfo::GetInstance().num_device(); std::vector values = server::SystemInfo::GetInstance().GPUPercent(); +// for (int i = 0; i < numDevide; ++i) { +// GPU_percent_gauges_[i].Set(static_cast(values[i])); +// } if(numDevide >= 1) GPU0_percent_gauge_.Set(static_cast(values[0])); if(numDevide >= 2) GPU1_percent_gauge_.Set(static_cast(values[1])); if(numDevide >= 3) GPU2_percent_gauge_.Set(static_cast(values[2])); @@ -70,9 +74,13 @@ PrometheusMetrics::GPUPercentGaugeSet() { void PrometheusMetrics::GPUMemoryUsageGaugeSet() { if(!startup_) return; + int numDevide = server::SystemInfo::GetInstance().num_device(); std::vector values = server::SystemInfo::GetInstance().GPUMemoryUsed(); - unsigned long long MtoB = 1024*1024; + constexpr unsigned long long MtoB = 1024*1024; int numDevice = values.size(); +// for (int i = 0; i < numDevice; ++i) { +// GPU_memory_usage_gauges_[i].Set(values[i]/MtoB); +// } if(numDevice >=1) GPU0_memory_usage_gauge_.Set(values[0]/MtoB); if(numDevice >=2) GPU1_memory_usage_gauge_.Set(values[1]/MtoB); if(numDevice >=3) GPU2_memory_usage_gauge_.Set(values[2]/MtoB); @@ -111,6 +119,22 @@ void PrometheusMetrics::ConnectionGaugeDecrement() { connection_gauge_.Decrement(); } +//void PrometheusMetrics::GpuPercentInit() { +// int num_device = SystemInfo::GetInstance().num_device(); +// constexpr char device_number[] = "DeviceNum"; +// for(int i = 0; i < num_device; ++ i) { +// GPU_percent_gauges_.emplace_back(GPU_percent_.Add({{device_number,std::to_string(i)}})); +// } +// +//} +//void PrometheusMetrics::GpuMemoryInit() { +// int num_device = SystemInfo::GetInstance().num_device(); +// constexpr char device_number[] = "DeviceNum"; +// for(int i = 0; i < num_device; ++ i) { +// GPU_memory_usage_gauges_.emplace_back(GPU_memory_usage_.Add({{device_number,std::to_string(i)}})); +// } +//} + } } diff --git a/cpp/src/metrics/PrometheusMetrics.h b/cpp/src/metrics/PrometheusMetrics.h index c23c137fe8..fc2bef6f60 100644 --- a/cpp/src/metrics/PrometheusMetrics.h +++ b/cpp/src/metrics/PrometheusMetrics.h @@ -49,6 +49,8 @@ class PrometheusMetrics: public MetricsBase { std::shared_ptr exposer_ptr_; std::shared_ptr registry_ = std::make_shared(); bool startup_ = false; +// void GpuPercentInit(); +// void GpuMemoryInit(); public: void AddGroupSuccessTotalIncrement(double value = 1.0) override { if(startup_) add_group_success_total_.Increment(value);}; @@ -115,45 +117,7 @@ class PrometheusMetrics: public MetricsBase { void ConnectionGaugeDecrement() override ; void KeepingAliveCounterIncrement(double value = 1) override {if(startup_) keeping_alive_counter_.Increment(value);}; -// prometheus::Counter &connection_total() {return connection_total_; } -// -// prometheus::Counter &add_group_success_total() { return add_group_success_total_; } -// prometheus::Counter &add_group_fail_total() { return add_group_fail_total_; } -// -// prometheus::Counter &get_group_success_total() { return get_group_success_total_;} -// prometheus::Counter &get_group_fail_total() { return get_group_fail_total_;} -// -// prometheus::Counter &has_group_success_total() { return has_group_success_total_;} -// prometheus::Counter &has_group_fail_total() { return has_group_fail_total_;} -// -// prometheus::Counter &get_group_files_success_total() { return get_group_files_success_total_;}; -// prometheus::Counter &get_group_files_fail_total() { return get_group_files_fail_total_;} -// -// prometheus::Counter &add_vectors_success_total() { return add_vectors_success_total_; } -// prometheus::Counter &add_vectors_fail_total() { return add_vectors_fail_total_; } -// -// prometheus::Histogram &add_vectors_duration_histogram() { return add_vectors_duration_histogram_;} -// -// prometheus::Counter &search_success_total() { return search_success_total_; } -// prometheus::Counter &search_fail_total() { return search_fail_total_; } -// -// prometheus::Histogram &search_duration_histogram() { return search_duration_histogram_; } -// prometheus::Histogram &raw_files_size_histogram() { return raw_files_size_histogram_; } -// prometheus::Histogram &index_files_size_histogram() { return index_files_size_histogram_; } -// -// prometheus::Histogram &build_index_duration_seconds_histogram() { return build_index_duration_seconds_histogram_; } -// -// prometheus::Histogram &all_build_index_duration_seconds_histogram() { return all_build_index_duration_seconds_histogram_; } -// -// prometheus::Gauge &cache_usage_gauge() { return cache_usage_gauge_; } -// -// prometheus::Counter &meta_visit_total() { return meta_visit_total_; } -// -// prometheus::Histogram &meta_visit_duration_seconds_histogram() { return meta_visit_duration_seconds_histogram_; } -// -// prometheus::Gauge &mem_usage_percent_gauge() { return mem_usage_percent_gauge_; } -// -// prometheus::Gauge &mem_usage_total_gauge() { return mem_usage_total_gauge_; } + @@ -477,6 +441,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &GPU5_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "5"}}); prometheus::Gauge &GPU6_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "6"}}); prometheus::Gauge &GPU7_percent_gauge_ = GPU_percent_.Add({{"DeviceNum", "7"}}); +// std::vector GPU_percent_gauges_; @@ -494,6 +459,7 @@ class PrometheusMetrics: public MetricsBase { prometheus::Gauge &GPU5_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "5"}}); prometheus::Gauge &GPU6_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "6"}}); prometheus::Gauge &GPU7_memory_usage_gauge_ = GPU_memory_usage_.Add({{"DeviceNum", "7"}}); +// std::vector GPU_memory_usage_gauges_; prometheus::Family &query_index_type_per_second_ = prometheus::BuildGauge() .Name("query_index_throughtout_per_microsecond") diff --git a/cpp/src/metrics/SystemInfo.cpp b/cpp/src/metrics/SystemInfo.cpp index a90e206a93..210817f856 100644 --- a/cpp/src/metrics/SystemInfo.cpp +++ b/cpp/src/metrics/SystemInfo.cpp @@ -56,7 +56,7 @@ void SystemInfo::Init() { } long long -SystemInfo::parseLine(char *line) { +SystemInfo::ParseLine(char *line) { // This assumes that a digit will be found and the line ends in " Kb". int i = strlen(line); const char *p = line; @@ -80,18 +80,20 @@ unsigned long SystemInfo::GetProcessUsedMemory() { //Note: this value is in KB! FILE* file = fopen("/proc/self/status", "r"); + constexpr int64_t line_length = 128; long long result = -1; - char line[128]; + constexpr int64_t KB_SIZE = 1024; + char line[line_length]; - while (fgets(line, 128, file) != NULL){ + while (fgets(line, line_length, file) != NULL){ if (strncmp(line, "VmRSS:", 6) == 0){ - result = parseLine(line); + result = ParseLine(line); break; } } fclose(file); // return value in Byte - return (result*1024); + return (result*KB_SIZE); } @@ -128,46 +130,46 @@ SystemInfo::CPUPercent() { return percent; } -std::unordered_map> -SystemInfo::GetGPUMemPercent(){ - // return GPUID: MEM% +//std::unordered_map> +//SystemInfo::GetGPUMemPercent(){ +// // return GPUID: MEM% +// +// //write GPU info to a file +// system("nvidia-smi pmon -c 1 > GPUInfo.txt"); +// int pid = (int)getpid(); +// +// //parse line +// std::ifstream read_file; +// read_file.open("GPUInfo.txt"); +// std::string line; +// while(getline(read_file, line)){ +// std::vector words = split(line); +// // 0 1 2 3 4 5 6 7 +// //words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively +// if(std::stoi(words[1]) != pid) continue; +// int GPUindex = std::stoi(words[0]); +// double sm_percent = std::stod(words[3]); +// double mem_percent = std::stod(words[4]); +// +// } +// +//} - //write GPU info to a file - system("nvidia-smi pmon -c 1 > GPUInfo.txt"); - int pid = (int)getpid(); - - //parse line - std::ifstream read_file; - read_file.open("GPUInfo.txt"); - std::string line; - while(getline(read_file, line)){ - std::vector words = split(line); - // 0 1 2 3 4 5 6 7 - //words stand for gpuindex, pid, type, sm, mem, enc, dec, command respectively - if(std::stoi(words[1]) != pid) continue; - int GPUindex = std::stoi(words[0]); - double sm_percent = std::stod(words[3]); - double mem_percent = std::stod(words[4]); - - } - -} - -std::vector -SystemInfo::split(std::string input) { - std::vector words; - input += " "; - int word_start = 0; - for (int i = 0; i < input.size(); ++i) { - if(input[i] != ' ') continue; - if(input[i] == ' ') { - word_start = i + 1; - continue; - } - words.push_back(input.substr(word_start,i-word_start)); - } - return words; -} +//std::vector +//SystemInfo::split(std::string input) { +// std::vector words; +// input += " "; +// int word_start = 0; +// for (int i = 0; i < input.size(); ++i) { +// if(input[i] != ' ') continue; +// if(input[i] == ' ') { +// word_start = i + 1; +// continue; +// } +// words.push_back(input.substr(word_start,i-word_start)); +// } +// return words; +//} std::vector SystemInfo::GPUPercent() { diff --git a/cpp/src/metrics/SystemInfo.h b/cpp/src/metrics/SystemInfo.h index d173f4f3af..042358c3df 100644 --- a/cpp/src/metrics/SystemInfo.h +++ b/cpp/src/metrics/SystemInfo.h @@ -42,14 +42,14 @@ class SystemInfo { } void Init(); - int NumDevice() {return num_device_;}; - long long parseLine(char* line); + int num_device() const {return num_device_;}; + long long ParseLine(char* line); unsigned long GetPhysicalMemory(); unsigned long GetProcessUsedMemory(); double MemoryPercent(); double CPUPercent(); - std::unordered_map> GetGPUMemPercent(); - std::vector split(std::string input); +// std::unordered_map> GetGPUMemPercent() {}; +// std::vector split(std::string input) {}; std::vector GPUPercent(); std::vector GPUMemoryUsed(); diff --git a/cpp/src/server/MegasearchThreadPoolServer.cpp b/cpp/src/server/MegasearchThreadPoolServer.cpp index f7ce0d4f72..d227442a45 100644 --- a/cpp/src/server/MegasearchThreadPoolServer.cpp +++ b/cpp/src/server/MegasearchThreadPoolServer.cpp @@ -4,16 +4,20 @@ * Proprietary and confidential. ******************************************************************************/ #include "metrics/Metrics.h" - - #include "MegasearchThreadPoolServer.h" +namespace zilliz { +namespace vecwise { +namespace server { -void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr &pClient) { +void +MegasearchThreadPoolServer::onClientConnected(const std::shared_ptr &pClient) { server::Metrics::GetInstance().ConnectionGaugeIncrement(); TThreadPoolServer::onClientConnected(pClient); } -void zilliz::vecwise::server::MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) { + +void +MegasearchThreadPoolServer::onClientDisconnected(apache::thrift::server::TConnectedClient *pClient) { server::Metrics::GetInstance().ConnectionGaugeDecrement(); TThreadPoolServer::onClientDisconnected(pClient); } @@ -25,3 +29,6 @@ zilliz::vecwise::server::MegasearchThreadPoolServer::MegasearchThreadPoolServer( : TThreadPoolServer(processor, serverTransport, transportFactory, protocolFactory, threadManager) { } +} +} +} \ No newline at end of file diff --git a/cpp/src/thrift/megasearch.thrift b/cpp/src/thrift/megasearch.thrift index 0dc7230934..0f15695e65 100644 --- a/cpp/src/thrift/megasearch.thrift +++ b/cpp/src/thrift/megasearch.thrift @@ -29,7 +29,7 @@ exception Exception { } - +/** * @brief Table Schema */ struct TableSchema {