forked from slok/sloth
-
Notifications
You must be signed in to change notification settings - Fork 1
/
k8s-multifile.yml
73 lines (73 loc) · 2.26 KB
/
k8s-multifile.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
---
# This example shows the same example as getting-started.yml but using Sloth Kubernetes CRD and multifile.
# It will generate the Prometheus rules in a Kubernetes prometheus-operator PrometheusRules CRD.
#
# `sloth generate -i ./examples/k8s-multifile.yml`
#
apiVersion: sloth.slok.dev/v1
kind: PrometheusServiceLevel
metadata:
name: sloth-slo-my-service
namespace: monitoring
spec:
service: "myservice"
labels:
owner: "myteam"
repo: "myorg/myservice"
tier: "2"
slos:
- name: "requests-availability"
objective: 99.9
description: "Common SLO based on availability for HTTP request responses."
sli:
events:
errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
alerting:
name: MyServiceHighErrorRate
labels:
category: "availability"
annotations:
summary: "High error rate on 'myservice' requests responses"
pageAlert:
labels:
severity: pageteam
routing_key: myteam
ticketAlert:
labels:
severity: "slack"
slack_channel: "#alerts-myteam"
---
apiVersion: sloth.slok.dev/v1
kind: PrometheusServiceLevel
metadata:
name: sloth-slo-my-service2
namespace: monitoring
spec:
service: "myservice2"
labels:
owner: "myteam2"
repo: "myorg/myservice2"
tier: "1"
slos:
- name: "requests-availability"
objective: 99.99
description: "Common SLO based on availability for HTTP request responses."
sli:
events:
errorQuery: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
totalQuery: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
alerting:
name: MyServiceHighErrorRate
labels:
category: "availability"
annotations:
summary: "High error rate on 'myservice' requests responses"
pageAlert:
labels:
severity: pageteam
routing_key: myteam
ticketAlert:
labels:
severity: "slack"
slack_channel: "#alerts-myteam"