diff options
author | Stewart Smith <stewart@linux.vnet.ibm.com> | 2015-04-29 13:57:23 +1000 |
---|---|---|
committer | Stewart Smith <stewart@linux.vnet.ibm.com> | 2015-04-29 13:57:23 +1000 |
commit | ca755775fe760f8f2e3673b7f0f3140cc816e8cd (patch) | |
tree | 38d3d084c66c8bf8a82829020d6508acb4e277cf /external | |
parent | 437b887183528a23399cc153bac0728cb635a298 (diff) | |
download | skiboot-ca755775fe760f8f2e3673b7f0f3140cc816e8cd.zip skiboot-ca755775fe760f8f2e3673b7f0f3140cc816e8cd.tar.gz skiboot-ca755775fe760f8f2e3673b7f0f3140cc816e8cd.tar.bz2 |
Add boot-tests scripts for automating boot testing on FSP and BMC systems
Have been using this for a while in the lab. It's a good start for more
fully automated and autonomous boot testing.
From: Daniel Axtens <daxtens@au1.ibm.com>
From: Cyril Bur <cyril.bur@au1.ibm.com>
Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'external')
-rw-r--r-- | external/boot-tests/bmc_support.sh | 71 | ||||
-rwxr-xr-x | external/boot-tests/boot_test.sh | 259 | ||||
-rw-r--r-- | external/boot-tests/fsp_support.sh | 161 |
3 files changed, 491 insertions, 0 deletions
diff --git a/external/boot-tests/bmc_support.sh b/external/boot-tests/bmc_support.sh new file mode 100644 index 0000000..03357ed --- /dev/null +++ b/external/boot-tests/bmc_support.sh @@ -0,0 +1,71 @@ +#Number of times to sleep +BOOT_TIMEOUT="5"; + +#Path to memboot binary +#MEMBOOT=${MEMBOOT:-memboot}; + +#Username/password for ssh to BMC machines +SSHUSER=${SSHUSER:-sysadmin}; +export SSHPASS=${SSHPASS:-superuser}; + +#Username/password for IPMI +IPMI_AUTH="-U ${IPMI_USER:-admin} -P ${IPMI_PASS:-admin}" + +# Strip control characters from IPMI before grepping? +STRIP_CONTROL=0 + +# How do we SSH/SCP in? +SSHCMD="sshpass -e ssh -l $SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $target"; +REMOTECPCMD="eval rsync -e \"sshpass -e ssh -l $SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \" "; + +function is_off { + return $([ "$($IPMI_COMMAND chassis power status)" = "Chassis Power is off" ]); +} + +function poweroff { + $IPMI_COMMAND chassis power off + # give it some time + sleep 10 +} + +function flash { + $REMOTECPCMD $PNOR $target:/tmp/image.pnor; + if [ "$?" -ne "0" ] ; then + error "Couldn't copy firmware image"; + fi + + # Habenaro doesn't have md5sum + #flash_md5=$(md5sum "$1" | cut -f 1 -d ' '); + #$SSHCMD "flash_md5r=\$(md5sum /tmp/image.pnor | cut -f 1 -d ' '); + # if [ \"$flash_md5\" != \"\$flash_md5r\" ] ; then + # exit 1; + # fi"; + #if [ "$?" -ne "0" ] ; then + # error "Firmware MD5s don't match"; + #fi + + # flash it + msg "Flashing PNOR" + $SSHCMD "/usr/local/bin/pflash -E -f -p /tmp/image.pnor" + if [ "$?" -ne "0" ] ; then + error "An unexpected pflash error has occured"; + fi +} + +function boot_firmware { + $IPMI_COMMAND chassis power on > /dev/null; + i=0; + while [ "$($IPMI_COMMAND chassis power status)" = "Chassis Power is off" -a \( "$i" -lt "$BOOT_TIMEOUT" \) ] ; do + msg -n "."; + sleep $BOOT_SLEEP_PERIOD; + i=$(expr $i + 1); + done + if [ "$i" -eq "$BOOT_TIMEOUT" ] ; then + error "Couldn't power on $target"; + fi +} + +function machine_sanity_test { + # No further sanity tests for BMC machines. + true +} diff --git a/external/boot-tests/boot_test.sh b/external/boot-tests/boot_test.sh new file mode 100755 index 0000000..09d0c27 --- /dev/null +++ b/external/boot-tests/boot_test.sh @@ -0,0 +1,259 @@ +#!/bin/bash +# Lets try for /bin/sh but bashisms will sneak in. + +# partial bash strict mode +set -uo pipefail + +V=0; + +if [ -f ~/.skiboot_boot_tests ]; then + source ~/.skiboot_boot_tests +fi + +# Utility functions +function error { + unset SSHPASS; + echo "$1" >&2; + exit 1; +} + +function msg { + if [ $V -ne 0 ] ; then + echo $@; + fi +} + +# Generic conf +BOOT_SLEEP_PERIOD=10 +FUNCTIONS_NEEDED="sshpass ssh ipmitool md5sum rsync expect"; + +function linux_boot { + if [ $STRIP_CONTROL -eq 1 ]; then + STRIPCOMMAND="col -b -l 1" + else + STRIPCOMMAND="cat" + fi + + #Everyone is going to forget to disconnect - force them off + ipmiresult=$($IPMI_COMMAND sol deactivate 2>&1); + retval=$? + if [ $retval -ne 0 -a "$ipmiresult" != "Info: SOL payload already de-activated" ]; then + error "IPMI failed; it has probably stalled on the FSP." + fi + + LINUXBOOT_LOG=$(mktemp --tmpdir builder-2.XXXXXX); + cat <<EOF | expect > $LINUXBOOT_LOG +set timeout 300 +spawn $IPMI_COMMAND sol activate +expect { +timeout { send_user "\nTimeout waiting for petitboot\n"; exit 1 } +eof { send_user "\nUnexpected EOF\n;" exit 1 } +"Welcome to Petitboot" +} + +close +exit 0 +EOF + retval=$? + $IPMI_COMMAND sol deactivate > /dev/null; + if [ $retval -ne 0 ]; then + msg "Waiting for linux has timed out" + msg "Boot log follows:" + cat $LINUXBOOT_LOG + rm -f $LINUXBOOT_LOG + return 1 + else + rm -f $LINUXBOOT_LOG + return 0 + fi +} + +function boot_test { + # The functions called (e.g. flash, boot) are from the *_support files + if [ $bootonly -ne 1 ]; then + msg "Flashing ${target}..." + flash $@; + fi + + msg "Booting $target..." + boot_firmware; + msg "firmware looks good, waiting for linux"; + + linux_boot; + if [ $? -ne 0 ] ; then + error "Couldn't reach petitboot on $target"; + fi + msg "$target has booted"; + unset SSHPASS; +} + +function sanity_test { + $SSHCMD true; + if [ $? -ne 0 ]; then + echo "Failed to SSH to $target..." + echo "Command was: $SSHCMD true" + error "Try connecting manually to diagnose the issue." + fi + + $IPMI_COMMAND chassis power status > /dev/null; + if [ $? -ne 0 ]; then + echo "Failed to connect to $target with IPMI..." + echo "Command was: $IPMI_COMMAND chassis power status" + error "Try connecting manually to diagnose the issue." + fi + + # do further machine-type specific tests + machine_sanity_test +} + +function usage { + cat <<EOF +boot_test.sh tests the bootability of a given target, optionally after + flashing new firmware onto the target. + +There are three usage modes. + +1) boot_test.sh -h + Print this help + +2) boot_test.sh [-vdp] -t target -b (fsp|bmc) + Boot test the target without flashing. Specify the type of machine + (FSP or BMC) with the -b option. + +3) boot_test.sh [-vdp] -t target -P pnor + boot_test.sh [-vdp] -t target [-1 lid1] [-2 lid2] [-3 lid3] + + Flash the given firmware before boot testing. + + If -P is given, the file is assumed to be a PNOR for BMC based + flashing. + + If -1/-2/-3 are given, the files are assumed to be lids for FSP based + flashing. Any combination of lids is acceptable. + +Common Options: + + -p powers off the machine if it is running. Without -p, a running machine + will cause the script to error out. + + -v makes the script print some progress messages. Recommended. + + -d makes the script print lots of things (set -vx). + Only use this for debugging the script: it's highly likely that + successful booting into Petitboot will not be detected with this option. +EOF + exit 1; +} + +## 'Main' script begins + +# Check prereqs +for func in $FUNCTIONS_NEEDED ; do + if ! command -v "$func" &> /dev/null ; then + error "I require command $func but it is not in \$PATH ($PATH)"; + fi +done + +# Parse options +V=0; +bootonly=0; +powerdown=0; +firmware_supplied=0; +target="" +method="" +PNOR="" +LID[0]="" +LID[1]="" +LID[2]="" +while getopts "hvdpb:1:2:3:P:t:" OPT; do + case "$OPT" in + v) + V=1; + ;; + h) + usage; + ;; + d) + set -vx; + ;; + b) + method=$OPTARG; + bootonly=1; + if [ $firmware_supplied -eq 1 ]; then + usage + fi + ;; + p) + powerdown=1; + ;; + 1|2|3) + firmware_supplied=1; + if [ \( $bootonly -eq 1 \) -o \( "$method" = "bmc" \) ]; then + usage; + fi + if [ ! -e "$OPTARG" ] ; then + error "Couldn't stat $OPTARG"; + fi + LID[$(expr ${OPT} - 1)]="$OPTARG" + method=fsp + ;; + P) + firmware_supplied=1; + if [ \( $bootonly -eq 1 \) -o \( "$method" != "" \) ]; then + usage; + fi + if [ ! -e "$OPTARG" ] ; then + error "Couldn't stat $OPTARG"; + fi + PNOR="$OPTARG" + method=bmc + ;; + t) + target=$OPTARG; + ;; + \?) + usage; + ;; + esac +done + +shift $(expr $OPTIND - 1); + +# Pull out the target and test +if [ "$target" = "" ]; then + usage; +fi + +if ! ping -c 1 "$target" &> /dev/null ; then + error "Couldn't ping $target"; +fi + +if [ "$#" -ne 0 ]; then + usage +fi + + +# pull in the relevant config file and set things up +source $(dirname $0)/${method}_support.sh +IPMI_COMMAND="ipmitool -I lanplus -H $target $IPMI_AUTH" + +msg "Running sanity test" +sanity_test +msg "Passed." + +# check the target is down +# (pulls in is_off from ${method}_support.sh) +if ! is_off; then + if [ $powerdown -eq 1 ]; then + poweroff + else + error "$target is not turned off"; + fi +fi + +# run the boot test +echo "Boot testing $target"; +begin_t=$(date +%s); +boot_test + +echo "Done in $(expr $(date +%s) - $begin_t ) seconds"; diff --git a/external/boot-tests/fsp_support.sh b/external/boot-tests/fsp_support.sh new file mode 100644 index 0000000..3feecd1 --- /dev/null +++ b/external/boot-tests/fsp_support.sh @@ -0,0 +1,161 @@ +#Number of times to sleep +BOOT_TIMEOUT="10"; + +#Username/password for for ssh to FSP machines +SSHUSER=${SSHUSER:-} +SSHPASS=${SSHPASS:-} + +if [ -z $SSHUSER ] || [ -z $SSHPASS ] ; then + msg "Set SSHUSER and SSHPASS in ENV or ~/.skiboot_boot_tests" + exit 1; +fi + +#IPMI +IPMI_AUTH="-P ${IPMI_PASS:-foo}"; + +# Strip control characters from IPMI before grepping? +STRIP_CONTROL=1 + +# How do we SSH in, cp files across? +SSHCMD="sshpass -e ssh -l $SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $target"; +REMOTECPCMD="sshpass -e scp -o User=$SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no "; + +GET_PROFILE='. /etc/profile; test -e /home/dev/.profile && . /home/dev/.profile'; + +function is_off { + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + return $([ "$state" = "standby" ]); +} + +function poweroff { + i=0; + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + if [ "$state" = "standby" ]; then + # already off + return 0 + fi + $SSHCMD "$GET_PROFILE; panlexec -f 8"; + msg "Waiting 30 seconds..." + sleep 30 + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + while [ "$state" != "standby" -a "$i" -lt "$BOOT_TIMEOUT" ] ; do + msg "Waiting $BOOT_SLEEP_PERIOD more seconds..." + sleep $BOOT_SLEEP_PERIOD; + i=$(expr $i + 1); + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + done; + # sleep a little bit longer --- p81 was getting a bit confused. + sleep 10 + msg "Finishing with state '$state'." +} + +function flash { + #Make a backup of the current lids + $REMOTECPCMD $target:/opt/extucode/80f00100.lid 80f00100.lid.bak && + $REMOTECPCMD $target:/opt/extucode/80f00101.lid 80f00101.lid.bak && + $REMOTECPCMD $target:/opt/extucode/80f00102.lid 80f00102.lid.bak; + if [ $? -ne 0 ] ; then + error "Couldn't make backup of currently installed lids"; + fi + + if [ "${LID[0]}" != "" ]; then + $REMOTECPCMD ${LID[0]} $target:/opt/extucode/80f00100.lid || + error "Error copying lid ${LID[0]}"; + sum=$(md5sum ${LID[0]} | cut -f 1 -d ' '); + $SSHCMD "$GET_PROFILE; + sumr=\$(md5sum /opt/extucode/80f00100.lid | cut -f 1 -d ' '); + if [ \"$sum\" != \"\$sumr\" ] ; then + exit 1; + fi;" || error "MD5sum doesn't match for ${LID[0]}" + + fi + + if [ "${LID[1]}" != "" ]; then + $REMOTECPCMD ${LID[1]} $target:/opt/extucode/80f00101.lid || + error "Error copying lid"; + sum=$(md5sum ${LID[1]} | cut -f 1 -d ' '); + $SSHCMD "$GET_PROFILE; + sumr=\$(md5sum /opt/extucode/80f00101.lid | cut -f 1 -d ' '); + if [ \"$sum\" != \"\$sumr\" ] ; then + exit 1; + fi;" || error "MD5sum doesn't match for ${LID[1]}" + fi + + if [ "${LID[2]}" != "" ]; then + $REMOTECPCMD ${LID[2]} $target:/opt/extucode/80f00102.lid || + error "Error copying lid"; + sum=$(md5sum ${LID[2]} | cut -f 1 -d ' '); + $SSHCMD "$GET_PROFILE; + sumr=\$(md5sum /opt/extucode/80f00102.lid | cut -f 1 -d ' '); + if [ \"$sum\" != \"\$sumr\" ] ; then + exit 1; + fi;" || error "MD5sum doesn't match for ${LID[2]}" + fi + + + $SSHCMD "$GET_PROFILE; + if [ \$(smgr mfgState) != 'standby' ] ; then + exit 1; + fi + cupdmfg -opt | grep '80f0010'"; + if [ $? -ne 0 ] ; then + error "Could not install lids on the FSP"; + fi + + sleep 2; #Don't rush the fsp +} + +function boot_firmware { + ISTEP_LOG=$(mktemp --tmpdir builder-1.XXXXXX); + $SSHCMD "$GET_PROFILE; istep" &> $ISTEP_LOG & + msg "Waiting 90 seconds for $target to boot"; + sleep 90; + i=0; + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + while [ \( "$state" != "runtime" \) -a \( "$i" -lt "$BOOT_TIMEOUT" \) ] ; do + msg "Waiting $BOOT_SLEEP_PERIOD more seconds"; + sleep "$BOOT_SLEEP_PERIOD"; + i=$(expr $i + 1); + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + done; + + if [ "$i" -eq "$BOOT_TIMEOUT" ] ; then + state=$($SSHCMD "$GET_PROFILE; smgr mfgState"); + case "$state" in + "ipling") + echo "$target still hasn't come up but firmware hasn't specifically crashed"; + ;; + "dumping") + echo "$target has crashed"; + ;; + "runtime") + echo "Oops, looks like system has managed to come up..."; + ;; + "standby") + echo "System is powered off? How can this be?"; + ;; + *) + echo "$taget is an unknown state '$state'"; + ;; + esac + echo "istep log"; + cat $ISTEP_LOG; + rm -rf $ISTEP_LOG + error "Boot test on $target failed"; + fi + rm -rf $ISTEP_LOG; +} + +function machine_sanity_test { + $SSHCMD "$GET_PROFILE; test -d /nfs/bin" + if [ $? -ne 0 ]; then + echo "Failed to read /nfs/bin" + error "Is /nfs mounted on the FSP?" + fi + + $SSHCMD "$GET_PROFILE; which md5sum > /dev/null && which cupdmfg > /dev/null" + if [ $? -ne 0 ]; then + echo "Missing md5sum or cupdmfg on the FSP?" + error "Is /nfs mounted on the FSP?" + fi +} |