监控机器列表文件:
server.list
建立监控脚本:webstatus.sh
#!/bin/sh
monitor_dir=/home/admin/monitor/ #Log记录目录
if [ ! -d $monitor_dir ]; then
mkdir $monitor_dir
fi
cd $monitor_dir
web_stat_log=web.status #Log文件
if [ ! -f $web_stat_log ]; then
touch $web_stat_log
fi
server_list_file=server.list #读取文件列表
if [ ! -f $server_list_file ]; then
echo "`date '+%Y-%m-%d %H:%M:%S'` ERROR:$server_list_file NOT exists!" >>$web_stat_log
exit 1
fi
#total=`wc -l $server_list_file|awk '{print $1}'`
for website in `cat $server_list_file`
do
url="http://$website/app.htm" #Url地址,从server.list 里读取
server_status_code=`curl -o /dev/null -s -m 10 --connect-timeout 10 -w %{http_code} "$url"`
if [ "$server_status_code" = "200" ]; then
echo "`date '+%Y-%m-%d %H:%M:%S'` visit $website status code 200 OK" >>$web_stat_log
else
echo "`date '+%Y-%m-%d %H:%M:%S'` visit $website error!!! server can't connect at 10s or stop response at 10 s, send alerm sms ..." >>$web_stat_log
echo "!app alarm @136xxxxxxxx server:$website can't connect at 10s or stop response at 10s ..." | nc smsserver port &
fi
done
exit 0
crontab -e 定时执行脚本。