-
Notifications
You must be signed in to change notification settings - Fork 175
/
getting-started.yml
30 lines (30 loc) · 1 KB
/
getting-started.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
version: "prometheus/v1"
service: "myservice"
labels:
owner: "myteam"
repo: "myorg/myservice"
tier: "2"
slos:
# We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
- name: "requests-availability"
objective: 99.9
description: "Common SLO based on availability for HTTP request responses."
sli:
events:
error_query: sum(rate(http_request_duration_seconds_count{job="myservice",code=~"(5..|429)"}[{{.window}}]))
total_query: sum(rate(http_request_duration_seconds_count{job="myservice"}[{{.window}}]))
alerting:
name: MyServiceHighErrorRate
labels:
category: "availability"
annotations:
# Overwrite default Sloth SLO alert summmary on ticket and page alerts.
summary: "High error rate on 'myservice' requests responses"
page_alert:
labels:
severity: pageteam
routing_key: myteam
ticket_alert:
labels:
severity: "slack"
slack_channel: "#alerts-myteam"