#!/bin/bash
#
# Bring up/down openibd
#
# chkconfig: 2345 05 95
# description: Activates/Deactivates InfiniBand Subnet Manager
#
### BEGIN INIT INFO
# Provides:       openibd
### END INIT INFO
#
# Copyright (c) 2006 Mellanox Technologies. All rights reserved.
#
# This Software is licensed under one of the following licenses:
#
# 1) under the terms of the "Common Public License 1.0" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/cpl.php.
#
# 2) under the terms of the "The BSD License" a copy of which is
#    available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/bsd-license.php.
#
# 3) under the terms of the "GNU General Public License (GPL) Version 2" a
#    copy of which is available from the Open Source Initiative, see
#    http://www.opensource.org/licenses/gpl-license.php.
#
# Licensee has the right to choose one of the above licenses.
#
# Redistributions of source code must retain the above copyright
# notice and one of the license notices.
#
# Redistributions in binary form must reproduce both the above copyright
# notice, one of the license notices in the documentation
# and/or other materials provided with the distribution.
#
#
#  $Id: openib-1.0-openibd.init,v 1.3 2006/06/16 15:52:53 dledford Exp $
#

# config: /etc/sysconfig/openib.conf
CONFIG="/etc/sysconfig/openib.conf"
NETWORK_CONF_DIR=/etc/sysconfig/network-scripts

if [ ! -f $CONFIG ]; then
    exit 0
fi

. $CONFIG

. /etc/rc.d/init.d/functions

count_ib_ports()
{
    local cnt=0
    local tmp_cnt=0
    
    tmp_cnt=$(/sbin/lspci -n | grep "15b3:6282" | wc -l | tr -d '[:space:]') # Arbel mode
    cnt=$[ $cnt + 2*${tmp_cnt} ]
    
    tmp_cnt=$(/sbin/lspci -n | grep -E "15b3:5e8c|15b3:6274" | wc -l | tr -d '[:space:]') # Sinai
    cnt=$[ $cnt + ${tmp_cnt} ]

    tmp_cnt=$(/sbin/lspci -n | grep -E "15b3:5a44|15b3:6278" | wc -l | tr -d '[:space:]') # Tavor mode
    cnt=$[ $cnt + 2*${tmp_cnt} ]
    
    return $cnt
}

# Setting OpenIB start parameters
POST_LOAD_MODULES=""

if [ "X${SDP_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES ib_sdp"
    IPOIB_LOAD="yes"
fi    

if [ "X${KDAPL_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES kdapl"
    IPOIB_LOAD="yes"
fi    

IPOIB=0
if [ "X${IPOIB_LOAD}" == "Xyes" ]; then
    IPOIB=1
fi

if [ "X${SRP_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES ib_srp"
fi    

if [ "X${SRP_TARGET_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES ib_srp_target"
fi    

if [ "X${RDMA_CM_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES rdma_cm"
fi    

if [ "X${UCM_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES ib_ucm"
fi    

if [ "X${RDS_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES ib_rds"
fi    

if [ "X${RDMA_UCM_LOAD}" == "Xyes" ]; then
    POST_LOAD_MODULES="$POST_LOAD_MODULES rdma_ucm"
fi    

PRE_UNLOAD_MODULES="ib_rds rdma_ucm rdma_cm ib_srp_target scsi_target ib_srp kdapl ib_sdp ib_ucm ib_cm"

# W/A for unloading modules
POST_UNLOAD_MODULES="$PRE_UNLOAD_MODULES ib_ipoib ib_sa ib_uverbs ib_umad ib_mthca ib_ipath ipath_core ib_mad ib_core"

STATUS_MODULES="ib_cm ib_sdp ib_rds ib_srp ib_srp_target"

count_ib_ports
ports_num=$?

[ "$ports_num" -eq "0" ] && exit 0

interfaces=""
                   
for (( i=0 ; i < $ports_num ; i++ ))
do
    interfaces="$interfaces ib${i}"
done    

# If module $1 is loaded return - 0 else - 1
is_module()
{
local RC

    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
    RC=$?
    
return $RC        
}

get_sw_fw_info()
{
    OFEDHOME="/usr/ofed"
    MREAD=$(which mread 2> /dev/null)
    
    # Get OFED Build id
    if [ -r ${OFEDHOME}/BUILD_ID ]; then
        echo  "Software"
        echo  "-------------------------------------"
        printf "Build ID:\n"
        cat ${OFEDHOME}/BUILD_ID
        echo  "-------------------------------------"
    fi    

    # Get FW version
    if [ ! -x ${MREAD} ]; then
        return 1
    fi

    vendor="15b3"
    slots=$(lspci -n -d "${vendor}:" 2> /dev/null | grep -v "5a46" | cut -d ' ' -f 1)
    for mst_device in $slots
    do
        major=$($MREAD ${mst_device} 0x82478 | cut -d ':' -f 2)
        subminor__minor=$($MREAD ${mst_device} 0x8247c | cut -d ':' -f 2)
        ftime=$($MREAD ${mst_device} 0x82480 | cut -d ':' -f 2)
        fdate=$($MREAD ${mst_device} 0x82484 | cut -d ':' -f 2)

        major=$(echo -n $major | cut -d x -f 2 | cut -b 4)
        subminor__minor1=$(echo -n $subminor__minor | cut -d x -f 2 | cut -b 3,4)
        subminor__minor2=$(echo -n $subminor__minor | cut -d x -f 2 | cut -b 5,6,7,8)
        echo
        echo "Device ${mst_device} Info:"
        echo "Firmware:"

        printf "\tVersion:"
        printf "\t$major.$subminor__minor1.$subminor__minor2\n"

        day=$(echo -n $fdate | cut -d x -f 2 | cut -b 7,8)
        month=$(echo -n $fdate | cut -d x -f 2 | cut -b 5,6)
        year=$(echo -n $fdate | cut -d x -f 2 | cut -b 1,2,3,4)
        hour=$(echo -n $ftime | cut -d x -f 2 | cut -b 5,6)
        min=$(echo -n $ftime | cut -d x -f 2 | cut -b 3,4)
        sec=$(echo -n $ftime | cut -d x -f 2 | cut -b 1,2)
        
        printf "\tDate:"
        printf "\t$day/$month/$year $hour:$min:$sec\n"
    done
}

# Create debug info
get_debug_info()
{
    trap '' 2 9 15
    DEBUG_INFO=/tmp/ib_debug_info.log
    /bin/rm -f $DEBUG_INFO
    touch $DEBUG_INFO
    echo "Hostname: `hostname -s`" >> $DEBUG_INFO
    test -e /etc/issue && echo "OS: `cat /etc/issue`" >> $DEBUG_INFO
    echo "Current kernel: `uname -r`" >> $DEBUG_INFO
    echo "Architecture: `uname -m`" >> $DEBUG_INFO
    echo "GCC version: `gcc --version`"  >> $DEBUG_INFO
    echo "CPU: `cat /proc/cpuinfo | /bin/grep -E \"model name|arch\" | head -1`" >> $DEBUG_INFO
    echo "`cat /proc/meminfo | /bin/grep \"MemTotal\"`" >> $DEBUG_INFO
    echo "Chipset: `/sbin/lspci | head -1 | cut -d ':' -f 2-`" >> $DEBUG_INFO

    echo >> $DEBUG_INFO
    get_sw_fw_info >> $DEBUG_INFO
    echo >> $DEBUG_INFO

    echo >> $DEBUG_INFO
    echo "############# LSPCI ##############" >> $DEBUG_INFO
    /sbin/lspci >> $DEBUG_INFO
    
    echo >> $DEBUG_INFO
    echo "############# LSPCI -N ##############" >> $DEBUG_INFO
    /sbin/lspci -n >> $DEBUG_INFO

    echo >> $DEBUG_INFO
    echo "############# LSMOD ##############" >> $DEBUG_INFO
    /sbin/lsmod >> $DEBUG_INFO
    
    echo >> $DEBUG_INFO
    echo "############# DMESG ##############" >> $DEBUG_INFO                                                            
    /bin/dmesg >> $DEBUG_INFO

    echo >> $DEBUG_INFO
    echo "############# Messages ##############" >> $DEBUG_INFO
    tail -50 /var/log/messages >> $DEBUG_INFO

    echo >> $DEBUG_INFO
    echo "############# Running Processes ##############" >> $DEBUG_INFO
    /bin/ps -ef >> $DEBUG_INFO
    echo "##############################################" >> $DEBUG_INFO

    echo
    echo "Please open an issue in the http://openib.org/bugzilla and attach $DEBUG_INFO"
    echo
}

errata_58()
{
# Check AMD chipset issue Errata #58
if test -x /sbin/lspci && test -x /sbin/setpci; then
     if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
        ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
        ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then

        if ! ( grep FIX_AMD_8131_ERR58 $CONFIG > /dev/null ); then
            echo >> $CONFIG
            echo "# Set FIX_AMD_8131_ERR58=YES to apply AMD-8131 Errata #58 workaround" >> $CONFIG
            echo "FIX_AMD_8131_ERR58=\"YES\"" >> $CONFIG
        fi
	
	# Set default value for FIX_AMD_8131_ERR58
	if [ -z "$FIX_AMD_8131_ERR58" ]; then
		FIX_AMD_8131_ERR58="YES"
	fi
        
        CURVAL=`/sbin/setpci -d 1022:1100 69`

        for val in $CURVAL
        do
            if [ "${val}" != "c0" ]; then
                if [ "$FIX_AMD_8131_ERR58" == "YES" ]; then
                    /sbin/setpci -d 1022:1100 69=c0
                    if [ $? -eq 0 ]; then
                        # echo "AMD-8131 Errata #58 workaround applied"
                        break
                    else
                        echo "Failed to apply AMD-8131 Errata #58 workaround"
                    fi
                else
                    echo
                    echo "Invalid configuration found for PCI-X chipset AMD-8131 (Errata #58)"
                    echo "For more details see IBG2 Distribution Release Notes (IBG2_Release_Notes.pdf under docs directory)"
                    echo
                    exit 1
                fi
            fi
        done
    
    fi
fi

}

errata_56()
{
# Check AMD chipset issue Errata #56
if test -x /sbin/lspci && test -x /sbin/setpci; then
     if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
        ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
	( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then

        if ! ( grep FIX_AMD_8131_ERR56 $CONFIG > /dev/null ); then
            echo >> $CONFIG
            echo "# Set FIX_AMD_8131_ERR56=YES to apply AMD-8131 Errata #56 workaround" >> $CONFIG
            echo "FIX_AMD_8131_ERR56=\"YES\"" >> $CONFIG
        fi

        # Set default value for FIX_AMD_8131_ERR56
        if [ -z "$FIX_AMD_8131_ERR56" ]; then
                FIX_AMD_8131_ERR56="YES"
        fi

	bus=""

	# Look for devices AMD-8131
	for amd_dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
	do

		bus=`/sbin/setpci -s $amd_dev 19`
		rev=`/sbin/setpci -s $amd_dev 8`


		# Look for Tavor attach to secondary bus of this devices
		for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
		do
			if [ $rev -lt 13 ]; then
				if [ "$FIX_AMD_8131_ERR56" == "YES" ]; then			
					/sbin/setpci -d 15b3:5a44 72=14
					if [ $? -eq 0 ]; then
		                        	# echo "AMD-8131 Errata #56 workaround applied"
			                        break
			                else
                        			echo "Failed to apply AMD-8131 Errata #56 workaround"
			                fi
                		else
		                	echo
			                echo "Invalid configuration found for PCI-X chipset AMD-8131 (Errata #56)"
                    			echo "For more details see IBG2 Distribution Release Notes (IBG2_Release_Notes.pdf under docs directory)"
                    			echo

		        	        exit 1
				fi

			else
				continue
			fi

			# If more than one device is on the bus the issue a
			# warning
			num=`/sbin/setpci -f -s $bus: 0 | wc -l |  sed 's/\ *//g'`

			if [ $num -gt 1 ]; then
				echo "Warning: your current PCI-X configuration might be incorrect."
				echo "see AMD-8131 Errata 56 for more details."
			fi

		done
	done

     fi
fi
}

start()
{
    local RC=0
    local loaded=0
    
    if is_module ipath_core ; then
        /sbin/modprobe ib_ipath > /dev/null 2>&1
	RC=$[ $RC + $? ]
    fi
   
    errata_58
    
    # Add node description to sysfs
    IBSYSDIR="/sys/class/infiniband"
    if [ -d ${IBSYSDIR} ]; then
        declare -i hca_id=1
        for hca in ${IBSYSDIR}/*
        do
	    if [ -e ${hca}/node_desc ]; then
            	echo -n "$(hostname -s) HCA-${hca_id}" >> ${hca}/node_desc
	    fi
            let hca_id++
        done
    fi
   
    echo -n "Loading OpenIB kernel modules" 
    if ! is_module ib_umad ; then
	/sbin/modprobe ib_umad > /dev/null 2>&1
        RC=$[ $RC + $? ]
    fi
    if ! is_module ib_uverbs ; then
        /sbin/modprobe ib_uverbs > /dev/null 2>&1
        RC=$[ $RC + $? ]
    fi
    
    if ! is_module ib_ipoib ; then
	if [ "$IPOIB" -eq 1 ]; then
	    /sbin/modprobe ib_ipoib > /dev/null 2>&1
	    RC=$[ $RC + $? ]
	fi
    fi

    if [ $RC -ne 0 ]; then
        echo_failure
	echo
        get_debug_info
        exit 1
    fi    
    
    # Load configured modules
    if [[ "$POST_LOAD_MODULES" != "" ]]; then
        for mod in  $POST_LOAD_MODULES
        do
		if ! is_module $mod ; then
            		/sbin/modprobe $mod > /dev/null 2>&1
            		RC2=$?
            		if [ $RC2 -ne 0 ]; then
				echo_failure
				echo
				echo "Failed to load module $mod"
			fi
			RC=$[ $RC2 + $RC ]
		fi
        done    
    fi
   
    if [ $RC -eq 0 ]; then
	echo_success
	echo
    fi 
    errata_56
    sleep 1    
    return $RC    
}

unload()
{
	# Unload module $1
	if ! is_module $mod; then
            	/sbin/modprobe -r $mod > /dev/null 2>&1
            	if [ $? -ne 0 ]; then
			# Try rmmod if modprobe failed: case that previous installation included more IB modules.
			/sbin/rmmod $mod > /dev/null 2>&1
                    	if [ $? -ne 0 ]; then
                    	    echo_failure $"Failed to unload $mod"
    			    get_debug_info
			    [ ! -z $2 ] && echo $2
                    	    exit 1
                    	fi    
		fi
	fi

}

stop()
{
    local RC=0
    
        # Check if applications which use infiniband are running
        local apps="opensm osmtest ibbs ibns"
        local pid
        
        for app in $apps
        do
    	if ( ps -ef | grep $app | grep -v grep > /dev/null 2>&1 ); then
                echo
                echo "Please stop $app and all applications running over InfiniBand"
                echo "Then run \"$0 $ACTION\""
                echo
                exit 1
            fi
        done

        if ! is_module ib_core; then
            if [ $RESTART -eq 0 ]; then
            	echo
            	echo_failure $"HCA driver is not loaded or loaded with errors"
            	echo
            	exit 1
            else
                    return 0
            fi                        
        fi

        # Unload ULPs modules

        if [ "$PRE_UNLOAD_MODULES" != "" ]; then
                for mod in  $PRE_UNLOAD_MODULES
                do
			unload $mod
                done
        fi

        # Remove srp_presistant_bind.sh before removeing ib_srp module
        PID_SCRPT_TO_KILL=`ps -efww | grep srp_persistent | grep -v grep | awk '{print $2}'`
        if ! [ "$PID_SCRPT_TO_KILL" == "" ]; then
              PID_SLEEP_TO_KILL=`ps -efww | grep $PID_SCRPT_TO_KILL | grep sleep | awk '{print $2}'`
              kill -9 $PID_SCRPT_TO_KILL
        fi
        if ! [ "$PID_SLEEP_TO_KILL" == "" ]; then
              kill -9 $PID_SLEEP_TO_KILL
        fi
    
        
        # Unload IPoIB interfaces
    if is_module ib_ipoib; then
        for i in $interfaces
        do
            ifdown ${i} > /dev/null 2>&1
        done    
    
        [ $RC -ne 0 ] && get_debug_info && exit 1
	sleep 2
    fi
        
        # Unload OpenIB modules
        
        if [ "$POST_UNLOAD_MODULES" != "" ]; then
                for mod in  $POST_UNLOAD_MODULES
                do
			unload $mod
                done
        fi

	sleep 1
    return $RC    
}

status()
{
    local RC=0
 
       if is_module ib_mthca; then
               echo
               echo "  HCA driver loaded"
               echo
       elif is_module ib_ipath; then
               echo
               echo "  HCA driver loaded"
               echo
       elif is_module ib_ehca; then
               echo
               echo "  HCA driver loaded"
               echo
       else
               echo
               echo $"HCA driver is not loaded"
               echo
       fi
  
    if is_module ib_ipoib; then
       echo $"Configured devices:"
       echo $interfaces
       echo
       echo $"Currently active devices:"
       
       for i in $interfaces
       do
      	    if [[ ! -e ${NETWORK_CONF_DIR}/ifcfg-${i} ]]; then
                continue
            fi
            echo `/sbin/ip -o link show $i | awk -F ": " '/UP>/ { print $2 }'`
            RC=$?
       done     
    fi
    
    echo
    
    local cnt=0
    
    for mod in  $STATUS_MODULES
    do
    	if is_module $mod; then
    		[ $cnt -eq 0 ] && echo "The following modules are also loaded:" && echo
    		let cnt++
    		echo "	$mod"
    	fi
    done
     
    echo
    
    return $RC
}


RC=0
start_time=$(date +%s | tr -d '[:space:]')

trap_handler()
{
    let run_time=$(date +%s | tr -d '[:space:]')-${start_time}

    # Ask to wait for 5 seconds if trying to stop openibd
    if [ $run_time -gt 5 ] && [ "$ACTION" == "stop" ]; then
        printf "\nProbably some application are still using InfiniBand modules...\n"
    else
        printf "\nPlease wait ...\n"
    fi    
    return 0
}

trap 'trap_handler' 2 9 15

case $1 in
	start)
                start
		;;
	stop)
		stop    
		;;
	restart)
                RESTART=1
		stop
                start
		;;
	status)
		status
		;;
	*)
		echo 
		echo "Usage: `basename $0` {start|stop|restart|status}"
		echo
		exit 1
		;;
esac

RC=$?
exit $RC
