This repository has been archived by the owner on Nov 1, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
snapshot.sh
executable file
·208 lines (194 loc) · 7.62 KB
/
snapshot.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
#!/bin/bash
export LANG="en_US.UTF-8"
current_dir=$(dirname $0)
cd ${current_dir} || exit 1
# thresholds
readonly COUNTER=5
readonly READ_BYTES_THRESHOLD=6000000000
readonly WRITE_BYTES_THRESHOLD=1800000000
readonly IO_AWAIT_THRESHOLD=220
readonly LOAD_AVG_THRESHOLD=16
readonly CPU_IDLE_THRESHOLD=0.2
# variables for pushing to falcon
hostname=$(hostname)
timestamp=$(date +%s)
post_data=""
# handling after terminated
trap 'cleanup' 1 2 3 15
function cleanup() {
rm -f top.lock iotop.lock cpustat.* diskstats.*
jobs -p | xargs kill -9
exit
}
function write_history() {
local history_file=${1}
local value=${2}
local latest=""
if [ ! -f ${history_file} ]; then
>${history_file}
else
latest="$(tail -$((COUNTER-1)) ${history_file}) ${value}"
>${history_file}
fi
for value in ${latest}; do
echo "${value}" >> ${history_file}
done
}
function check_io_await() {
if [ -f diskstats.new ]; then
mv diskstats.new diskstats.old
fi
cat /proc/diskstats > diskstats.new
if [ -f diskstats.old ] && [ -f diskstats.new ]; then
date "+%x %T"
fdisk -l | grep -E "Disk /dev/sd"\|"Disk /dev/vd" | awk '{print substr($2,6,index($2,":")-6)}' | while read dev; do
echo "${dev}: "
local read_request_old=$(awk '{if ($3=="'${dev}'") print $4}' diskstats.old)
local read_sectors_old=$(awk '{if ($3=="'${dev}'") print $6}' diskstats.old)
local msec_read_old=$(awk '{if ($3=="'${dev}'") print $7}' diskstats.old)
local write_request_old=$(awk '{if ($3=="'${dev}'") print $8}' diskstats.old)
local read_request_new=$(awk '{if ($3=="'${dev}'") print $4}' diskstats.new)
local read_sectors_new=$(awk '{if ($3=="'${dev}'") print $6}' diskstats.new)
local msec_read_new=$(awk '{if ($3=="'${dev}'") print $7}' diskstats.new)
local write_request_new=$(awk '{if ($3=="'${dev}'") print $8}' diskstats.new)
local write_sectors_old=$(awk '{if ($3=="'${dev}'") print $10}' diskstats.old)
local msec_write_old=$(awk '{if ($3=="'${dev}'") print $11}' diskstats.old)
local write_sectors_new=$(awk '{if ($3=="'${dev}'") print $10}' diskstats.new)
local msec_write_new=$(awk '{if ($3=="'${dev}'") print $11}' diskstats.new)
local n_io=$((read_request_new-read_request_old+write_request_new-write_request_old))
local use=$((msec_read_new-msec_read_old+msec_write_new-msec_write_old))
local read_bytes=$(((read_sectors_new-read_sectors_old)*512))
local write_bytes=$(((write_sectors_new-write_sectors_old)*512))
local io_await=0
if [ ${n_io} -ne 0 ]; then
io_await=$(echo "scale=2;${use}/${n_io}" | bc)
fi
write_history ${dev}_read_bytes_history ${read_bytes}
write_history ${dev}_write_bytes_history ${write_bytes}
write_history ${dev}_io_await_history ${io_await}
echo "read_bytes: ${read_bytes}"
echo "write_bytes: ${write_bytes}"
echo "io.await: ${io_await}"
if [ $(awk 'BEGIN{count=0}{if ($0 > '${READ_BYTES_THRESHOLD}') count+=1}END{print count}' ${dev}_read_bytes_history) -eq ${COUNTER} ] || [ $(awk 'BEGIN{count=0}{if ($0 > '${WRITE_BYTES_THRESHOLD}') count+=1}END{print count}' ${dev}_write_bytes_history) -eq ${COUNTER} ] || [ $(awk 'BEGIN{count=0}{if ($0 > '${IO_AWAIT_THRESHOLD}') count+=1}END{print count}' ${dev}_io_await_history) -eq ${COUNTER} ]; then
dump_io_top
fi
done
fi
}
function check_cpu_idle() {
if [ -f cpustat.new ]; then
mv cpustat.new cpustat.old
fi
cat /proc/stat > cpustat.new
if [ -f cpustat.old ] && [ -f cpustat.new ]; then
local total_old=$(cat cpustat.old | head -1 | awk '{print $2+$3+$4+$5+$6+$7+$8+$9+$10+$11}')
local total_new=$(cat cpustat.new | head -1 | awk '{print $2+$3+$4+$5+$6+$7+$8+$9+$10+$11}')
local idle_old=$(cat cpustat.old | head -1 | awk '{print $5}')
local idle_new=$(cat cpustat.new | head -1 | awk '{print $5}')
local cpu_idle=$(echo "scale=2;(${idle_new}-${idle_old})/(${total_new}-${total_old})" | bc)
write_history cpu_idle_history ${cpu_idle}
echo "cpu idle: ${cpu_idle}"
if [ $(awk 'BEGIN{count=0}{if ($0 < '${CPU_IDLE_THRESHOLD}') count+=1}END{print count}' cpu_idle_history) -eq ${COUNTER} ]; then
dump_top
fi
fi
}
function check_load_avg() {
local load_avg=$(cat /proc/loadavg | awk '{print $1}')
write_history load_avg_history ${load_avg}
echo "load_avg: ${load_avg}"
if [ $(awk 'BEGIN{count=0}{if ($0 >= '${LOAD_AVG_THRESHOLD}') count+=1}END{print count}' load_avg_history) -eq ${COUNTER} ]; then
dump_top
fi
}
function dump_top() {
echo "dump top"
local counter=0
while [ -f top.lock ] && [ ${counter} -lt 5 ]; do
counter=$((counter+1))
sleep 1
done
if [ -f top.lock ]; then
echo "unable to get top.lock"
return 1
else
touch top.lock
top -b -n 5 >> top.$(date '+%Y%m%d')
rm -f top.lock
fi
}
function dump_io_top() {
echo "dump io top"
local counter=0
while [ -f iotop.lock ] && [ ${counter} -lt 5 ]; do
counter=$((counter+1))
sleep 1
done
if [ -f iotop.lock ]; then
echo "unable to get iotop.lock"
return 1
else
touch iotop.lock
iotop -b -t -k -o -P -n 5 >> iotop.$(date '+%Y%m%d')
rm -f iotop.lock
fi
}
function schedule() {
type bc &> /dev/null
if [ $? -ne 0 ]; then
yum install -y bc
if [ $? -ne 0 ]; then
local metric_data='{"endpoint": "'${hostname}'", "metric": "load.snapshot", "timestamp": '${timestamp}', "step": 60, "value": 1, "counterType": "GAUGE", "tags": "name=bc_installed"},'
echo ${metric_data}
post_data=${post_data}' '${metric_data}
fi
else
local metric_data='{"endpoint": "'${hostname}'", "metric": "load.snapshot", "timestamp": '${timestamp}', "step": 60, "value": 0, "counterType": "GAUGE", "tags": "name=bc_installed"},'
echo ${metric_data}
post_data=${post_data}' '${metric_data}
fi
type iotop &> /dev/null
if [ $? -ne 0 ]; then
yum install -y iotop
if [ $? -ne 0 ]; then
local metric_data='{"endpoint": "'${hostname}'", "metric": "load.snapshot", "timestamp": '${timestamp}', "step": 60, "value": 1, "counterType": "GAUGE", "tags": "name=iotop_installed"},'
echo ${metric_data}
post_data=${post_data}' '${metric_data}
fi
else
local metric_data='{"endpoint": "'${hostname}'", "metric": "load.snapshot", "timestamp": '${timestamp}', "step": 60, "value": 0, "counterType": "GAUGE", "tags": "name=iotop_installed"},'
echo ${metric_data}
post_data=${post_data}' '${metric_data}
check_io_await &
check_cpu_idle &
check_load_avg &
wait
fi
}
function push_to_falcon() {
if [ -z "$1" ]; then
return 0
fi
local ret_code=$(curl -s -m 180 -w %{http_code} -X POST -d "$1" http://127.0.0.1:1988/v1/push -o /dev/null)
if [ ${ret_code} -eq 200 ]; then
echo "push to falcon successfully"
return 0
else
echo "push to falcon failed"
return 1
fi
}
function purge() {
find . -name 'top.*' -ctime +7 -exec rm -f {} \;
find . -name 'iotop.*' -ctime +7 -exec rm -f {} \;
}
function main() {
schedule
purge
post_data=${post_data%,}
post_data='['${post_data}']'
echo ${post_data}
push_to_falcon "${post_data}"
return $?
}
main