#!/bin/sh
#    mdevt - Handle md raid events from sgraidmon or "mdadm --follow"
#    Parameters
#       $1 = event     ("Fail", "Insert", "Save", or "Check") 
#       $2 = md dev    (/dev/md0)
#       $3 = disk part (/dev/sdb6, for Fail/Insert)
#       $4 = SCSI Target ID  (usu 0-6, for Fail/Insert)
#       $5 = iteration (for sgraidmon Insert only)
#
# sgraidmon is the only tool that detects insertion
# mdadm was formerly known as mdctl.
#
# Changes:
# 04/23/04 ARC: fixed alarmcmd if condition to ignore iteration
# 06/08/04 ARC: check for alarmcmd error if no IPMI driver
# 09/06/05 ARC: added sample snmptrap logic
# 04/13/07 ARC: changed disklist to use /proc/partitions instead of fdisk
# 08/03/11 ARC: changed alarmcmd from alarms to ialarms
#
log=/var/log/mdevents
pdir=/usr/share/scsirastools
pfile=$pdir/sdf.save
waitf=/tmp/waitmd.sh
tmpf=/tmp/mdevt.tmp
alarmcmd=/usr/sbin/ialarms
# set dosnmptrap to 1 if you want to send snmp traps
dosnmptrap=0
# set snmpipadr to the snmp enterprise server destination
snmpipadr=127.0.0.1
# These sample SNMP Trap OIDs show up as Adaptec SCSI traps (see iommib.mib).
# Use other OIDs if you wish.
trapoid="enterprises.795.12.1.107"
vboid1=".enterprises.795.12.1.9000.9001"
vboid2=".enterprises.795.12.1.9000.9002"
vboid3=".enterprises.795.12.1.9000.9008"
vboid4=".enterprises.795.12.1.9000.9009"
vboid5=".enterprises.795.12.1.9000.9010"
snmphost=`uname -n`

echo "-------------------"  >>$log
echo "mdevt: $* `date`"  |tee -a >>$log
# write the event also to the syslog
logger -i "mdevt: $* `date`"  
mdname=$2
sdname=$3
tgt=$4
iter=$5
case "$1" in 
#####
##### Fail/Remove a disk partition, called from sgraidmon
#####
"Fail")
   cat /proc/mdstat           >>$log
   # make sure the drive is marked offline/faulty
   echo "mdadm $mdname -f $sdname"      >>$log
   mdadm $mdname -f $sdname             >>$log 2>&1
   # get the disk name from the partition device name
   if [ -d /dev/scsi/host0 ] || [ -d /dev/scsi/host1 ]
   then
        idisk=`echo $sdname |sed -e 's/part[0-9]/disc/'`
        disklist=`cat /proc/partitions |awk '{print $4}' |grep disc`
   else
        idisk=`echo $sdname |sed -e 's/[0-9]//'`
        disklist=`cat /proc/partitions |awk '{print $4}' |grep sd |grep -v [0123456789]`
   fi
   # Set the disk fault LED if mBMC platform
   $alarmcmd >$tmpf 2>/dev/null
   if [ $? -ne 0 ]
   then
	echo "$alarmcmd error, make sure ipmiutil is installed." >>$log
   fi
	# get the disk index (usu 0 or 1) from the disk name
	#disklist="`fdisk -l |grep "^Disk " |cut -f1 -d':' |sed -e 's/Disk //'`"
	i=0
	for d in $disklist
	do
		if [ "$idisk" = "/dev/$d" ]
		then
			break
		else
			i=`expr $i + 1`
		fi
	done
   grep "mBMC" $tmpf >/dev/null
   if [ $? -eq 0 ] 
   then
	# set the disk fault LED on
	echo "Setting Fault LED for Disk $i" >>$log
	if [ $i -eq 0 ]
	then
	   $alarmcmd -a1  >>$log 2>&1
	else
	   $alarmcmd -b1  >>$log 2>&1
	fi
   else
        tgt=`expr $tgt - 1`
        echo "Setting Fault LED for Disk $tgt" >>$log
        $alarmcmd -d${tgt}1  >>$log 2>&1
   fi
   if [ $dosnmptrap -eq 1 ]
   then
      uptim=`cat /proc/uptime |cut -f1 -d' '`
      snmpsev=5
      snmpfn=3
      echo "Sending SNMP trap to $snmpipadr for Disk $i failure" >>$log
      snmptrap -v 1 -c public $snmpipadr $trapoid $snmphost 6 0 $uptim $vboid1 i $snmpfn $vboid2 i $i $vboid3 i 7 $vboid4 i $i $vboid5 i $snmpsev >>$log 2>&1 
   fi 
   # remove the driver partition from the raid
   echo "raidhotremove $mdname $sdname" >>$log
   raidhotremove $mdname $sdname        >>$log 2>&1 
   ;;
#####
##### Insert a new disk partition, called from sgraidmon
#####
"Insert")
   echo "mdadm $mdname -f $sdname"      >>$log
   mdadm $mdname -f $sdname             >>$log 2>&1
   echo "raidhotremove $mdname $sdname" >>$log
   raidhotremove $mdname $sdname        >>$log 2>&1
   # Need to set up partitions with sfdisk first
   if [ "$iter" = "1" ]
   then 
      # Only set up partitions the first time
      # Assume that the "Save" has been done earlier, e.g.:
      #   "mdevt Save /dev/sda" or "sfdisk -d /dev/sda >$pfile" 
      if [ -d /dev/scsi/host0 ] || [ -d /dev/scsi/host1 ]
      then
        idisk=`echo $sdname |sed -e 's/part[0-9]/disc/'`
        disklist=`cat /proc/partitions |awk '{print $4}' |grep disc`
      else
        idisk=`echo $sdname |sed -e 's/[0-9]//'`
        disklist=`cat /proc/partitions |awk '{print $4}' |grep sd |grep -v [0123456789]`
      fi
      if [ -f $pfile ]
      then
	 # repartition the disk
         echo "dd if=/dev/zero of=$idisk bs=512 count=1" >>$log
         dd if=/dev/zero of=$idisk bs=512 count=1
	 echo "sfdisk --force $idisk <$pfile" >>$log  
         sfdisk --force  $idisk  <$pfile  2>&1 |tee -a $log
      else
	 echo "*** ERROR *** $pfile does not exist, run mdevt Save" >>$log 
      fi
      if [ $dosnmptrap -eq 1 ]
      then
         # get disk number (usu 0 or 1), allow for devfs, idisk was set above
         #disklist="`fdisk -l |grep "^Disk " |cut -f1 -d':' |sed -e 's/Disk //'`"
         newdisk=$idisk
         i=0
         for d in $disklist
	 do
		if [ "$newdisk" = "/dev/$d" ]
		then
			break
		else
			i=`expr $i + 1`
		fi
	 done
         uptim=`cat /proc/uptime |cut -f1 -d' '`
         snmpfn=2
         snmpsev=6
         echo "Sending SNMP trap to $snmpipadr for Disk $i insert" >>$log
         snmptrap -v 1 -c public $snmpipadr $trapoid $snmphost 6 0 $uptim $vboid1 i $snmpfn $vboid2 i $i $vboid3 i 7 $vboid4 i $i $vboid5 i $snmpsev >>$log 2>&1 
      fi 
   fi
   # flush each partition, in case old superblock was there
   #echo "dd if=/dev/zero of=$sdname bs=1024 count=1" >>$log
   #dd if=/dev/zero of=$sdname bs=1024 count=1 >>$log 2>&1
   #sync; sync
   echo "raidhotadd $mdname $sdname"    >>$log
   raidhotadd $mdname $sdname           >>$log 2>&1
   cat /proc/mdstat           >>$log
   if [ "$iter" = "2" ]
   then 
	# run lilo or grub after remirror is complete.
	tmpa=/tmp/grub.in
        if [ -d /dev/scsi/host0 ] || [ -d /dev/scsi/host1 ]
        then
          idisk=`echo $sdname |sed -e 's/part[0-9]/disc/'`
          disklist=`cat /proc/partitions |awk '{print $4}' |grep disc`
        else
          idisk=`echo $sdname |sed -e 's/[0-9]//'`
          disklist=`cat /proc/partitions |awk '{print $4}' |grep sd |grep -v [0123456789]`
        fi
	cat - <<%%% >$waitf
sleep 5
while [ 1 ]
do
   inprog=0
   cat /proc/mdstat |grep recovery  >/dev/null
   if [ \$? -eq 0 ]
   then
	inprog=1
   fi
   cat /proc/mdstat |grep resync  >/dev/null
   if [ \$? -eq 0 ]
   then
	inprog=1
   fi
   if [ \$inprog -eq 0 ]
   then
      echo "Remirror is complete." >>$log
      if [ -f /etc/lilo.conf ]
      then
        echo "Running lilo." >>$log
	lilo -V |grep 22
	if [ \$? -eq 0 ]
	then 
	  # lilo -M is only valid if lilo >= v22
	  lilo -M $idisk >>$log  2>&1
	fi
	lilo  >>$log  2>&1
      fi
      # get disk number (usu 0 or 1), allow for devfs, idisk was set above
      # disklist="fdisk -l |grep "^Disk " |cut -f1 -d':' |sed -e 's/Disk //'"
      newdisk=$idisk
      i=0
	for d in "$disklist"
	do
		if [ "\$newdisk" = "/dev/\$d" ]
		then
			break
		else
			i=\`expr \$i + 1\`
		fi
	done
      # clear the disk fault LED if mBMC platform
      $alarmcmd >$tmpf 2>/dev/null
      if [ \$? -ne 0 ]
      then
	echo "$alarmcmd error, make sure ipmiutil is installed." >>$log
      fi
      grep "mBMC" $tmpf >/dev/null
      if [ \$? -eq 0 ] 
      then
	# set the disk fault LED off
	echo "Clearing Fault LED for Disk \$i" >>$log
	if [ \$i -eq 0 ]
	then
	   $alarmcmd -a0  >>$log 2>&1
	else
	   $alarmcmd -b0  >>$log 2>&1
	fi
      else
        tgt=`expr $tgt - 1`
        echo "Clearing Fault LED for Disk $tgt" >>$log
        $alarmcmd -d${tgt}0  >>$log 2>&1
      fi
      if [ -f /boot/grub/grub.conf ]
      then
        echo "Running grub." >>$log
	if [ \$i -eq 0 ]
	then
		j=1
	else
		j=0
	fi
	echo "grub: \$newdisk = hd\$i, other = hd\$j" >>$log
	# write the mbr & stage2 to the new disk from the other one
	cat - <<EOF >$tmpa

install (hd\$j,0)/grub/stage1 (hd\$i) (hd\$i,0)/grub/stage2 (hd\$j,0)/grub/grub.conf
quit
EOF
	grub <$tmpa  >>$log 2>&1
	echo "grub ret = \$?"  >>$log
      fi
      exit 0
   fi
   sleep 1
done
%%%
	chmod 755 $waitf
	$waitf &
	echo "wait_pid = $!" >>$log
   fi
   ;;
#####
##### Save partition layout - called at init time from sgraid 
#####
"Save")
   if [ $1 = "Save" ]
   then
      sdname=$2
      mkdir -p $pdir
      echo "sfdisk -d $sdname >$pfile" >>$log
      sfdisk -d $sdname >$pfile
      # Some (like SuSE) have menu.lst but not grub.conf
      if [ ! -f /etc/lilo.conf ] && [ -f /boot/grub/menu.lst ]
      then
	if [ ! -f /boot/grub/grub.conf ]
	then
		# Make a grub.conf for later reference
		ln /boot/grub/menu.lst /boot/grub/grub.conf
	fi
      fi
      if [ -f /boot/grub/grub.conf ]
      then
	 # Make sure the second disk also has a valid mbr
	 # Note that this is a workaround for grub not handling raid 
	 # devices,  More would have to be done for >2 disks also.
	 # Write mbr & stage2 from hd $j to $i (1st partition)
	 i=1
	 j=0
	 tmpa=/tmp/grub.in
	 cat - <<EOF >$tmpa

install (hd$j,0)/grub/stage1 (hd$i) (hd$i,0)/grub/stage2 (hd$j,0)/grub/grub.conf
quit
EOF
         echo "grub: writing mbr to hd$i" >>$log
	 grub <$tmpa  >>$log 2>&1
	 echo "grub ret = $?"  >>$log
      fi
   fi
   ;;
#####
##### Check if saved partition layout file is present
#####
"Check")
   if [ -f $pfile ]
   then
	echo "OK, saved partition layout is there ($pfile)." | tee -a $log
	exit 0
   else 
	echo "Saved partition layout is missing ($pfile)." | tee -a $log
	echo "Run '/etc/rc.d/init.d/sgraid start' to invoke mdevt Save" | tee -a $log
	exit -1
   fi
   ;;
#####
##### Display Usage if invalid parameters
#####
*)
   echo "Invalid function: $0 $1 $2 $3 $4 $5" | tee -a $log
   echo "Usage: mdevt <event> [md dev] [disk] [scsi_id] [iteration]" | tee -a $log
   echo "       where parameters are:"
   echo "         1 = event     (Fail, Insert, Check, or Save)"
   echo "         2 = md dev    (e.g. /dev/md0  - for Fail/Insert only)"
   echo "         3 = disk part (e.g. /dev/sdb6 - for Fail,Insert,Save)"
   echo "         4 = SCSI ID   (e.g. 0,1,2  - for Fail,Insert)"
   echo "         5 = iteration (e.g. 1,2,3  - for Insert only)"
   ;;
esac
