Skip to content

Instantly share code, notes, and snippets.

@craigwillis85
Last active February 22, 2017 14:36
Show Gist options
  • Select an option

  • Save craigwillis85/310e50fbea652bb2bdf0def31fb17701 to your computer and use it in GitHub Desktop.

Select an option

Save craigwillis85/310e50fbea652bb2bdf0def31fb17701 to your computer and use it in GitHub Desktop.
apiVersion: v1
kind: ConfigMap
metadata:
name: alertmanager
data:
config.yml: |-
global:
# The directory from which notification templates are read.
templates:
- '/etc/alertmanager/template/*.tmpl'
# The root route on which each incoming alert enters.
route:
group_by: ['alertname', 'cluster', 'service', 'kubernetes_pod_name', 'team']
group_wait: 10s
group_interval: 5m
repeat_interval: 3h
receiver: 'platformSlackInfo'
routes:
# always keep this below that way if team isn't set at least the platform
# gets the alerts
- receiver: 'platformSlackUrgent'
match:
severity: critical
continue: true
- receiver: 'platformSlackInfo'
match:
severity: info
continue: true
receivers:
- name: 'platformSlackUrgent'
slack_configs:
- api_url: "https://hooks.slack.com/services/TOKEN"
channel: "#testing"
text: '{{ .CommonAnnotations.description }}'
username: 'Prometheus'
- name: 'platformSlackInfo'
slack_configs:
- api_url: "https://hooks.slack.com/services/TOKEN"
channel: "#testing"
text: '{{ .CommonAnnotations.description }}'
username: 'Prometheus'
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: alertmanager
spec:
replicas: 1
selector:
matchLabels:
app: alertmanager
template:
metadata:
name: alertmanager
labels:
app: alertmanager
spec:
containers:
- name: alertmanager
image: prom/alertmanager:latest
args:
- '-config.file=/etc/alertmanager/config.yml'
- '-storage.path=/alertmanager'
ports:
- name: web
containerPort: 9093
volumeMounts:
- name: config-volume
mountPath: /etc/alertmanager
volumes:
- name: config-volume
configMap:
name: alertmanager
apiVersion: extensions/v1beta1
kind: DaemonSet
metadata:
name: node-exporter
spec:
template:
metadata:
labels:
app: node-exporter
name: node-exporter
spec:
containers:
- image: prom/node-exporter
name: node-exporter
ports:
- containerPort: 9100
hostPort: 9100
name: scrape
hostNetwork: true
hostPID: true
apiVersion: v1
kind: ConfigMap
metadata:
name: prometheus
data:
prometheus.yml: |
global:
scrape_interval: 10s
scrape_timeout: 10s
evaluation_interval: 10s
scrape_configs:
- job_name: 'kubernetes-apiservers'
kubernetes_sd_configs:
- role: endpoints
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
relabel_configs:
- source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
action: keep
regex: default;kubernetes;https
- source_labels: [__address__]
target_label: host
regex: '(.*):.*'
- job_name: 'kubernetes-nodes'
scheme: https
tls_config:
ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
kubernetes_sd_configs:
- role: node
relabel_configs:
- action: labelmap
regex: __meta_kubernetes_node_label_(.+)
- job_name: 'kubernetes-service-endpoints'
kubernetes_sd_configs:
- role: endpoints
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
action: replace
target_label: __scheme__
regex: (https?)
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
action: replace
target_label: __address__
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
action: replace
target_label: kubernetes_name
- source_labels: [__address__]
target_label: host
regex: '(.*):.*'
- job_name: 'kubernetes-services'
metrics_path: /probe
params:
module: [http_2xx]
kubernetes_sd_configs:
- role: service
relabel_configs:
- source_labels: [__meta_kubernetes_service_annotation_prometheus_io_probe]
action: keep
regex: true
- source_labels: [__address__]
target_label: __param_target
- target_label: __address__
replacement: blackbox
- source_labels: [__param_target]
target_label: instance
- action: labelmap
regex: __meta_kubernetes_service_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_service_name]
target_label: kubernetes_name
- source_labels: [__address__]
target_label: host
regex: '(.*):.*'
- job_name: 'kubernetes-pods'
kubernetes_sd_configs:
- role: pod
relabel_configs:
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape]
action: keep
regex: true
- source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path]
action: replace
target_label: __metrics_path__
regex: (.+)
- source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port]
action: replace
regex: ([^:]+)(?::\d+)?;(\d+)
replacement: $1:$2
target_label: __address__
- action: labelmap
regex: __meta_kubernetes_pod_label_(.+)
- source_labels: [__meta_kubernetes_namespace]
action: replace
target_label: kubernetes_namespace
- source_labels: [__meta_kubernetes_pod_name]
action: replace
target_label: kubernetes_pod_name
- source_labels: [__address__]
target_label: host
regex: '(.*):.*'
rule_files:
- alert.rules
alert.rules: |
ALERT ServiceDown
IF up == 0
FOR 5m
LABELS { severity="critical" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} is down",
description = "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes.",
}
ALERT InstanceHighCpu
IF (100 - avg by(host)(irate(node_cpu{mode='idle'}[5m])) * 100) > 10
LABELS { severity="info" }
ANNOTATIONS {
summary = "Instance {{ $labels.instance }} cpu highest",
description = "{{ $labels.instance }} has high cpu activity",
}
apiVersion: extensions/v1beta1
kind: Deployment
metadata:
name: prometheus
spec:
replicas: 1
selector:
matchLabels:
app: prometheus
template:
metadata:
name: prometheus
labels:
app: prometheus
spec:
containers:
- name: prometheus
image: prom/prometheus:latest
args:
- '-storage.local.retention=24h'
- '-storage.local.memory-chunks=500000'
- '-config.file=/etc/prometheus/prometheus.yml'
- -alertmanager.url=http://45.76.138.196:31045
ports:
- name: web
containerPort: 9090
volumeMounts:
- name: config-volume
mountPath: /etc/prometheus
volumes:
- name: config-volume
configMap:
name: prometheus
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment