Loadcheck script

=================================================================
#!/bin/bash
#
# LoadCheck
# Raj
#——————————————————————–
#
#—–General-Configs————————————————

#general log name
logn=$(date +”%m-%d-%H:%M.log”)

#Path to the directory that will hold all logs
logp=”/loadscripts/logs”

#Path to file that is created for the email report
loge=”/loadscripts/lastlog”

#Script path – not needed – for later use
scriptp=

#—–Hard-Drive——————————————————

#Do we want to minitor hard drive usages? 1 for yes and 0 to disable
hdstatus=”1″

#At what percent should we send alerts?
hdalert=”95″

#—–Process-Logging————————————————–

#log processes?
pmon=yes

#log mysql?
mmon=yes

#process log name
plogn=$(date +”%m-%d-%H:%M-ps.log”)

#mysql log name
mlogn=$(date +”%m-%d-%H:%M-mysql.log”)

#process log path
plogp=$logp/pslogs

#mysql log path

mlogp=$logp/mlogs

#How many days should we keep the Process logs? A value of 0 will disable log removal
plogd=”4″

#—–Load-Monitoring————————————————–

#Define the avg load level limit to send an email alert #This can be anything (1, 1.5, 10.2 ..etc)
NOTIFY=”4″

#Define the avg load level limit to send an Urgent email alert
UNOTIFY=”10″

#How many days should we keep the logs? A value of 0 will disable log removal
logd=”2″

#load averages on time based averages. This can be 5, 10 or 15 minute load averages
lavg=”5″

#Warning Email notification
EMAIL=”bhagyaraj.mb@gmail.com”

#Urgent email to support/management
UEMAIL=”bhagyaraj.mb@gmail.com”

# Subject for Urgent email
UTEXT=”This is an automated response from loadscript – server loads on $(hostname) have an average 10 minute load of $F10M. Please investigate”

# Subject for Warning email
SUBJECT=”Warning Alert $(hostname) load average”

# Subject for Urgent email
USUBJECT=”Overload Alert $(hostname) load average”

#How many logs should we email in our warning email
emailc=”3″

#—-Do-Not-Edit-Below-Unless-You-Know-What-You-Are-Doing————–

#Getting uptime
uptime=”$(uptime)”
if $(echo $uptime | grep -E “min|days|day” >/dev/null);
then
ut=$(echo $uptime | awk ‘{ print $3,$4,$5}’ |sed s/.$//)
else
ut=$(echo $uptime | sed s/,//g| awk ‘{ print $3 ” (hh:mm)”}’)
fi

#avg load
avgload=”$(uptime |awk -F’average:’ ‘{ print $2}’)”

#Current load
curload=”$(echo $avgload | sed s/,//g | awk ‘{ print $2}’)”

#Used Ram
rusedram=”$(free -mto | grep Mem: | awk ‘{ print $3 ” MB” }’)”

#free ram
rfreeram=”$(free -mto | grep Mem: | awk ‘{ print $4 ” MB” }’)”

#total ram
rtotalram=”$(free -mto | grep Mem: | awk ‘{ print $2 ” MB” }’)”

#swap usage
swap=”$(free -mto | grep Swap: | awk ‘{ print $3 ” MB” }’)”

#Getting server time
date=”$(date |awk ‘{print $2, $3, $4}’)”

#get partition usages
space=$(df -h | sort -rnk 5 | head -4 | \awk ‘{ print “” $6 “\t: ” $5 ” used” }’)

# Check the partitions for free space
if [ “$hdstatus” == “1” ]; then
function diskc() {
while read dresults;
do
#echo $dresultst
hdcheck=$(echo $dresults | awk ‘{ print $1}’ | cut -d’%’ -f1)
partition=$(echo $dresults | awk ‘{print $2}’)
if [ $hdcheck -ge $hdalert ] ; then
echo “Running out of space \”$partition ($hdcheck%)\” on server $(hostname), $(date)” | mail -s “load Monitor  Almost out of disk space $hdcheck%” $EMAIL
echo “Running out of space \”$partition ($hdcheck%)\” on server $(hostname), $(date)” | mail -s “loadMonitor  Almost out of disk space $hdcheck%” $UEMAIL
echo “Disk Space Warning triggered” >> $logp/$logn
echo WARNING: $partition is at $hdcheck\% usage – Notification sent
fi
done
}

df -H | grep -vE “^Filesystem|tmpfs|cdrom” | awk ‘{print $5 ” ” $6}’ | diskc

else
echo Hard Drive monitoring is disabled
fi

#Check log directory
if [ -d $logp ];
then
echo Looks like we can create our logs
else
echo “$logp does not exist. Please correct your log directory setting or create the directory”
echo load monitor – I found an error in your configurations. $logp does not exist. Please correct your log directory setting or create the directory | mail -s “load Monitor Failed” “$EMAIL”
exit 1
fi

# Required to function
TRUE=”1″

#load average statement as shown from uptime command
FTEXT=’load average:’

#Which load averages are we monitoring?
if [ “$lavg” == “5” ]; then
loadc=”$(uptime | awk -F “$FTEXT” ‘{ print $2 }’ | cut -d, -f1)”
else
if [ “$lavg” = “10” ]; then
loadc=”$(uptime | awk -F “$FTEXT” ‘{ print $2 }’ | cut -d, -f2)”
else
if [ “$lavg” = “15” ]; then
loadc=”$(uptime | awk -F “$FTEXT” ‘{ print $2 }’ | cut -d, -f3)”
else
echo You entered in $lavg for the Load Average value. Please choose a value of 5, 10, or 15 \(as minutes\)
echo load monitor – I found an error in your configurations. You entered in $lavg for the Load Average value. Please choose a value of 5, 10, or 15 \(as minutes\)| mail -s “load Monitor Failed” “$EMAIL”
exit 1
fi
fi
fi

#Getting load averages
F5M=”$(uptime | awk -F “$FTEXT” ‘{ print $2 }’ | cut -d, -f1)”
F10M=”$(uptime | awk -F “$FTEXT” ‘{ print $2 }’ | cut -d, -f2)”
F15M=”$(uptime | awk -F “$FTEXT” ‘{ print $2 }’ | cut -d, -f3)”

# Create the email data
echo “” >> $logp/$logn
#if [ “$lavc > $NOTIFY” ]; then
# echo “load average is beyond $NOTIFY” >> $logp/$logn
#fi
echo “———————-” >> $logp/$logn
echo “$(hostname)” >> $logp/$logn
echo “$date:” >> $logp/$logn
echo “Uptime: $ut” >> $logp/$logn
echo “Load: $F5M,$F10M,$F15M” >> $logp/$logn
echo “Ram: $rtotalram total, $rfreeram free, swap: $swap” used >> $logp/$logn
echo “$space” >> $logp/$logn
if [ “$pmon” == yes ]; then
echo “Process log created – $plogp/$plogn”  >> $logp/$logn
else
echo “Process tracking is disabled”  >> $logp/$logn
fi

if [ “$mmon” == yes ]; then
echo “MySQL log created – $mlogp/$mlogn”  >> $logp/$logn
else
echo “MySQL tracking is disabled”  >> $logp/$logn
fi

#Gathering the last four checks to be emailed on the event of server overload
for i in `ls $logp/*.log |sort -rn |head -$emailc`;do cat $i ;done > $loge

# Check the server loads
RESULT=$(echo “$loadc  > $NOTIFY” | bc)

# If exceeded, send normal email alert
if [ “$RESULT” == “$TRUE” ]; then
mail -s “Warning Alert $(hostname) load average” $EMAIL < $loge

echo Email notification has been triggered.

#–Setting-UP–Process–Log–

if [ “$mmon” == yes ]; then
echo “logging mysql”
mysqladmin proc >> $mlogp/$mlogn
fi

if [ “$pmon” == yes ]; then
echo Gathering Process logs
if [ -d “$plogp” ]; then
uptime >> $plogp/$plogn
echo >> $plogp/$plogn
ps auxf >> $plogp/$plogn
else

echo $plogp does not exist…Cannot log processing
echo
echo “Would you like for me to create $plogp for you? (yes/no)”
read plogpcreate

if [ “$plogpcreate” == yes ]; then
mkdir “$plogp”

else

echo Process logging is disabled in the configs
fi

fi

if [ “$plogd” == “0” ]; then

echo Process Log removal has been disabled in the configurations \($plogd\)
else

find $plogp -mtime +$logd -type f -exec rm -rf {} \;
echo We are removing the Process logs that are older than $plogd days

fi
fi

#–END-PS

fi

# Check the server loads for urgent
URESULT=$(echo “$loadc  > $UNOTIFY” | bc)

# If Urgent threshold is exceeded, send Urgent email alert
if [ “$URESULT” == “$TRUE” ]; then
echo $UTEXT | mail -s “$USUBJECT” “$UEMAIL”
echo  Urgent notification has been triggered.
fi

# Checking if we should remove old log files or if removal is disabled
if [ “$logd” == “0” ]; then
echo Log removal has been disabled in the configurations \($logd\)
else
find $logp -mtime +$logd -type f -exec rm -rf {} \;
echo We are removing the load logs that are older than $logd days

fi
=====================================================================

  1. Leave a comment

Leave a comment