MS-570 - Add prometheus docker-compose file

Former-commit-id: a81bcfd940892284d9f0c26049e4c42c0cf80f34
This commit is contained in:
Yu Kun 2019-09-18 20:03:19 +08:00
parent 8382fa623c
commit 1a3231e6f6
4 changed files with 129 additions and 0 deletions

19
docker/alertmanager.yml Normal file
View File

@ -0,0 +1,19 @@
global:
resolve_timeout: 5m
route:
group_by: ['alertname']
group_wait: 10s
group_interval: 10s
repeat_interval: 1h
receiver: 'web.hook'
receivers:
- name: 'web.hook'
webhook_configs:
- url: 'http://127.0.0.1:5001/'
inhibit_rules:
- source_match:
severity: 'critical'
target_match:
severity: 'warning'
equal: ['alertname', 'dev', 'instance']

View File

@ -0,0 +1,56 @@
version: '2.3'
networks:
monitor:
driver: bridge
services:
prometheus:
image: prom/prometheus:v2.11.1
container_name: prometheus
hostname: prometheus
restart: always
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- ./server_down.yml:/etc/prometheus/node_down.yml
ports:
- "9090:9090"
networks:
- monitor
alertmanager:
image: prom/alertmanager
container_name: alertmanager
hostname: alertmanager
restart: always
volumes:
- ./alertmanager.yml:/etc/alertmanager/alertmanager.yml
ports:
- "9093:9093"
networks:
- monitor
grafana:
image: grafana/grafana
container_name: grafana
hostname: grafana
restart: always
ports:
- "3000:3000"
networks:
- monitor
milvus:
runtime: nvidia
image: registry.zilliz.com/milvus/engine:branch-0.4.0-release
container_name: milvus
hostname: milvus
restart: always
volumes:
- ../cpp/conf/server_config.yaml:/opt/milvus/cpp/conf/server_config.yaml
- ../cpp/conf/log_config.conf:/opt/milvus/cpp/conf/log_config.conf
ports:
- "8080:8080"
- "19530:19530"
networks:
- monitor

46
docker/prometheus.yml Normal file
View File

@ -0,0 +1,46 @@
# my global config
global:
scrape_interval: 15s # Set the scrape interval to every 1 seconds. Default is every 1 minute.
evaluation_interval: 15s # Evaluate rules every 15 seconds. The default is every 1 minute.
# scrape_timeout is set to the global default (10s).
# Alertmanager configuration
alerting:
alertmanagers:
- static_configs:
- targets: ['localhost:9093']
# Load rules once and periodically evaluate them according to the global 'evaluation_interval'.
rule_files:
- "serverdown.yml" # add alerting rules
# A scrape configuration containing exactly one endpoint to scrape:
# Here it's Prometheus itself.
scrape_configs:
# The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
- job_name: 'prometheus'
# metrics_path defaults to '/metrics'
# scheme defaults to 'http'.
static_configs:
- targets: ['localhost:9090']
# scrape metrics of server
- job_name: 'milvus_server'
scrape_interval: 1s
static_configs:
- targets: ['localhost:8080']
# scrape metrics of server
- job_name: 'milvus_server_1'
scrape_interval: 1s
static_configs:
- targets: ['localhost:8080']
# under development
- job_name: 'pushgateway'
static_configs:
- targets: ['localhost:9091']

8
docker/server_down.yml Normal file
View File

@ -0,0 +1,8 @@
groups:
- name: milvus
rules:
- alert: MilvusServerDown
expr: up{job="milvus_server"}
for: 1s
labels:
serverity: page