aboutsummaryrefslogtreecommitdiff
path: root/external/boot-tests
diff options
context:
space:
mode:
authorStewart Smith <stewart@linux.vnet.ibm.com>2015-04-29 13:57:23 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2015-04-29 13:57:23 +1000
commitca755775fe760f8f2e3673b7f0f3140cc816e8cd (patch)
tree38d3d084c66c8bf8a82829020d6508acb4e277cf /external/boot-tests
parent437b887183528a23399cc153bac0728cb635a298 (diff)
downloadskiboot-ca755775fe760f8f2e3673b7f0f3140cc816e8cd.zip
skiboot-ca755775fe760f8f2e3673b7f0f3140cc816e8cd.tar.gz
skiboot-ca755775fe760f8f2e3673b7f0f3140cc816e8cd.tar.bz2
Add boot-tests scripts for automating boot testing on FSP and BMC systems
Have been using this for a while in the lab. It's a good start for more fully automated and autonomous boot testing. From: Daniel Axtens <daxtens@au1.ibm.com> From: Cyril Bur <cyril.bur@au1.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'external/boot-tests')
-rw-r--r--external/boot-tests/bmc_support.sh71
-rwxr-xr-xexternal/boot-tests/boot_test.sh259
-rw-r--r--external/boot-tests/fsp_support.sh161
3 files changed, 491 insertions, 0 deletions
diff --git a/external/boot-tests/bmc_support.sh b/external/boot-tests/bmc_support.sh
new file mode 100644
index 0000000..03357ed
--- /dev/null
+++ b/external/boot-tests/bmc_support.sh
@@ -0,0 +1,71 @@
+#Number of times to sleep
+BOOT_TIMEOUT="5";
+
+#Path to memboot binary
+#MEMBOOT=${MEMBOOT:-memboot};
+
+#Username/password for ssh to BMC machines
+SSHUSER=${SSHUSER:-sysadmin};
+export SSHPASS=${SSHPASS:-superuser};
+
+#Username/password for IPMI
+IPMI_AUTH="-U ${IPMI_USER:-admin} -P ${IPMI_PASS:-admin}"
+
+# Strip control characters from IPMI before grepping?
+STRIP_CONTROL=0
+
+# How do we SSH/SCP in?
+SSHCMD="sshpass -e ssh -l $SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $target";
+REMOTECPCMD="eval rsync -e \"sshpass -e ssh -l $SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \" ";
+
+function is_off {
+ return $([ "$($IPMI_COMMAND chassis power status)" = "Chassis Power is off" ]);
+}
+
+function poweroff {
+ $IPMI_COMMAND chassis power off
+ # give it some time
+ sleep 10
+}
+
+function flash {
+ $REMOTECPCMD $PNOR $target:/tmp/image.pnor;
+ if [ "$?" -ne "0" ] ; then
+ error "Couldn't copy firmware image";
+ fi
+
+ # Habenaro doesn't have md5sum
+ #flash_md5=$(md5sum "$1" | cut -f 1 -d ' ');
+ #$SSHCMD "flash_md5r=\$(md5sum /tmp/image.pnor | cut -f 1 -d ' ');
+ # if [ \"$flash_md5\" != \"\$flash_md5r\" ] ; then
+ # exit 1;
+ # fi";
+ #if [ "$?" -ne "0" ] ; then
+ # error "Firmware MD5s don't match";
+ #fi
+
+ # flash it
+ msg "Flashing PNOR"
+ $SSHCMD "/usr/local/bin/pflash -E -f -p /tmp/image.pnor"
+ if [ "$?" -ne "0" ] ; then
+ error "An unexpected pflash error has occured";
+ fi
+}
+
+function boot_firmware {
+ $IPMI_COMMAND chassis power on > /dev/null;
+ i=0;
+ while [ "$($IPMI_COMMAND chassis power status)" = "Chassis Power is off" -a \( "$i" -lt "$BOOT_TIMEOUT" \) ] ; do
+ msg -n ".";
+ sleep $BOOT_SLEEP_PERIOD;
+ i=$(expr $i + 1);
+ done
+ if [ "$i" -eq "$BOOT_TIMEOUT" ] ; then
+ error "Couldn't power on $target";
+ fi
+}
+
+function machine_sanity_test {
+ # No further sanity tests for BMC machines.
+ true
+}
diff --git a/external/boot-tests/boot_test.sh b/external/boot-tests/boot_test.sh
new file mode 100755
index 0000000..09d0c27
--- /dev/null
+++ b/external/boot-tests/boot_test.sh
@@ -0,0 +1,259 @@
+#!/bin/bash
+# Lets try for /bin/sh but bashisms will sneak in.
+
+# partial bash strict mode
+set -uo pipefail
+
+V=0;
+
+if [ -f ~/.skiboot_boot_tests ]; then
+ source ~/.skiboot_boot_tests
+fi
+
+# Utility functions
+function error {
+ unset SSHPASS;
+ echo "$1" >&2;
+ exit 1;
+}
+
+function msg {
+ if [ $V -ne 0 ] ; then
+ echo $@;
+ fi
+}
+
+# Generic conf
+BOOT_SLEEP_PERIOD=10
+FUNCTIONS_NEEDED="sshpass ssh ipmitool md5sum rsync expect";
+
+function linux_boot {
+ if [ $STRIP_CONTROL -eq 1 ]; then
+ STRIPCOMMAND="col -b -l 1"
+ else
+ STRIPCOMMAND="cat"
+ fi
+
+ #Everyone is going to forget to disconnect - force them off
+ ipmiresult=$($IPMI_COMMAND sol deactivate 2>&1);
+ retval=$?
+ if [ $retval -ne 0 -a "$ipmiresult" != "Info: SOL payload already de-activated" ]; then
+ error "IPMI failed; it has probably stalled on the FSP."
+ fi
+
+ LINUXBOOT_LOG=$(mktemp --tmpdir builder-2.XXXXXX);
+ cat <<EOF | expect > $LINUXBOOT_LOG
+set timeout 300
+spawn $IPMI_COMMAND sol activate
+expect {
+timeout { send_user "\nTimeout waiting for petitboot\n"; exit 1 }
+eof { send_user "\nUnexpected EOF\n;" exit 1 }
+"Welcome to Petitboot"
+}
+
+close
+exit 0
+EOF
+ retval=$?
+ $IPMI_COMMAND sol deactivate > /dev/null;
+ if [ $retval -ne 0 ]; then
+ msg "Waiting for linux has timed out"
+ msg "Boot log follows:"
+ cat $LINUXBOOT_LOG
+ rm -f $LINUXBOOT_LOG
+ return 1
+ else
+ rm -f $LINUXBOOT_LOG
+ return 0
+ fi
+}
+
+function boot_test {
+ # The functions called (e.g. flash, boot) are from the *_support files
+ if [ $bootonly -ne 1 ]; then
+ msg "Flashing ${target}..."
+ flash $@;
+ fi
+
+ msg "Booting $target..."
+ boot_firmware;
+ msg "firmware looks good, waiting for linux";
+
+ linux_boot;
+ if [ $? -ne 0 ] ; then
+ error "Couldn't reach petitboot on $target";
+ fi
+ msg "$target has booted";
+ unset SSHPASS;
+}
+
+function sanity_test {
+ $SSHCMD true;
+ if [ $? -ne 0 ]; then
+ echo "Failed to SSH to $target..."
+ echo "Command was: $SSHCMD true"
+ error "Try connecting manually to diagnose the issue."
+ fi
+
+ $IPMI_COMMAND chassis power status > /dev/null;
+ if [ $? -ne 0 ]; then
+ echo "Failed to connect to $target with IPMI..."
+ echo "Command was: $IPMI_COMMAND chassis power status"
+ error "Try connecting manually to diagnose the issue."
+ fi
+
+ # do further machine-type specific tests
+ machine_sanity_test
+}
+
+function usage {
+ cat <<EOF
+boot_test.sh tests the bootability of a given target, optionally after
+ flashing new firmware onto the target.
+
+There are three usage modes.
+
+1) boot_test.sh -h
+ Print this help
+
+2) boot_test.sh [-vdp] -t target -b (fsp|bmc)
+ Boot test the target without flashing. Specify the type of machine
+ (FSP or BMC) with the -b option.
+
+3) boot_test.sh [-vdp] -t target -P pnor
+ boot_test.sh [-vdp] -t target [-1 lid1] [-2 lid2] [-3 lid3]
+
+ Flash the given firmware before boot testing.
+
+ If -P is given, the file is assumed to be a PNOR for BMC based
+ flashing.
+
+ If -1/-2/-3 are given, the files are assumed to be lids for FSP based
+ flashing. Any combination of lids is acceptable.
+
+Common Options:
+
+ -p powers off the machine if it is running. Without -p, a running machine
+ will cause the script to error out.
+
+ -v makes the script print some progress messages. Recommended.
+
+ -d makes the script print lots of things (set -vx).
+ Only use this for debugging the script: it's highly likely that
+ successful booting into Petitboot will not be detected with this option.
+EOF
+ exit 1;
+}
+
+## 'Main' script begins
+
+# Check prereqs
+for func in $FUNCTIONS_NEEDED ; do
+ if ! command -v "$func" &> /dev/null ; then
+ error "I require command $func but it is not in \$PATH ($PATH)";
+ fi
+done
+
+# Parse options
+V=0;
+bootonly=0;
+powerdown=0;
+firmware_supplied=0;
+target=""
+method=""
+PNOR=""
+LID[0]=""
+LID[1]=""
+LID[2]=""
+while getopts "hvdpb:1:2:3:P:t:" OPT; do
+ case "$OPT" in
+ v)
+ V=1;
+ ;;
+ h)
+ usage;
+ ;;
+ d)
+ set -vx;
+ ;;
+ b)
+ method=$OPTARG;
+ bootonly=1;
+ if [ $firmware_supplied -eq 1 ]; then
+ usage
+ fi
+ ;;
+ p)
+ powerdown=1;
+ ;;
+ 1|2|3)
+ firmware_supplied=1;
+ if [ \( $bootonly -eq 1 \) -o \( "$method" = "bmc" \) ]; then
+ usage;
+ fi
+ if [ ! -e "$OPTARG" ] ; then
+ error "Couldn't stat $OPTARG";
+ fi
+ LID[$(expr ${OPT} - 1)]="$OPTARG"
+ method=fsp
+ ;;
+ P)
+ firmware_supplied=1;
+ if [ \( $bootonly -eq 1 \) -o \( "$method" != "" \) ]; then
+ usage;
+ fi
+ if [ ! -e "$OPTARG" ] ; then
+ error "Couldn't stat $OPTARG";
+ fi
+ PNOR="$OPTARG"
+ method=bmc
+ ;;
+ t)
+ target=$OPTARG;
+ ;;
+ \?)
+ usage;
+ ;;
+ esac
+done
+
+shift $(expr $OPTIND - 1);
+
+# Pull out the target and test
+if [ "$target" = "" ]; then
+ usage;
+fi
+
+if ! ping -c 1 "$target" &> /dev/null ; then
+ error "Couldn't ping $target";
+fi
+
+if [ "$#" -ne 0 ]; then
+ usage
+fi
+
+
+# pull in the relevant config file and set things up
+source $(dirname $0)/${method}_support.sh
+IPMI_COMMAND="ipmitool -I lanplus -H $target $IPMI_AUTH"
+
+msg "Running sanity test"
+sanity_test
+msg "Passed."
+
+# check the target is down
+# (pulls in is_off from ${method}_support.sh)
+if ! is_off; then
+ if [ $powerdown -eq 1 ]; then
+ poweroff
+ else
+ error "$target is not turned off";
+ fi
+fi
+
+# run the boot test
+echo "Boot testing $target";
+begin_t=$(date +%s);
+boot_test
+
+echo "Done in $(expr $(date +%s) - $begin_t ) seconds";
diff --git a/external/boot-tests/fsp_support.sh b/external/boot-tests/fsp_support.sh
new file mode 100644
index 0000000..3feecd1
--- /dev/null
+++ b/external/boot-tests/fsp_support.sh
@@ -0,0 +1,161 @@
+#Number of times to sleep
+BOOT_TIMEOUT="10";
+
+#Username/password for for ssh to FSP machines
+SSHUSER=${SSHUSER:-}
+SSHPASS=${SSHPASS:-}
+
+if [ -z $SSHUSER ] || [ -z $SSHPASS ] ; then
+ msg "Set SSHUSER and SSHPASS in ENV or ~/.skiboot_boot_tests"
+ exit 1;
+fi
+
+#IPMI
+IPMI_AUTH="-P ${IPMI_PASS:-foo}";
+
+# Strip control characters from IPMI before grepping?
+STRIP_CONTROL=1
+
+# How do we SSH in, cp files across?
+SSHCMD="sshpass -e ssh -l $SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no $target";
+REMOTECPCMD="sshpass -e scp -o User=$SSHUSER -o LogLevel=quiet -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ";
+
+GET_PROFILE='. /etc/profile; test -e /home/dev/.profile && . /home/dev/.profile';
+
+function is_off {
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ return $([ "$state" = "standby" ]);
+}
+
+function poweroff {
+ i=0;
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ if [ "$state" = "standby" ]; then
+ # already off
+ return 0
+ fi
+ $SSHCMD "$GET_PROFILE; panlexec -f 8";
+ msg "Waiting 30 seconds..."
+ sleep 30
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ while [ "$state" != "standby" -a "$i" -lt "$BOOT_TIMEOUT" ] ; do
+ msg "Waiting $BOOT_SLEEP_PERIOD more seconds..."
+ sleep $BOOT_SLEEP_PERIOD;
+ i=$(expr $i + 1);
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ done;
+ # sleep a little bit longer --- p81 was getting a bit confused.
+ sleep 10
+ msg "Finishing with state '$state'."
+}
+
+function flash {
+ #Make a backup of the current lids
+ $REMOTECPCMD $target:/opt/extucode/80f00100.lid 80f00100.lid.bak &&
+ $REMOTECPCMD $target:/opt/extucode/80f00101.lid 80f00101.lid.bak &&
+ $REMOTECPCMD $target:/opt/extucode/80f00102.lid 80f00102.lid.bak;
+ if [ $? -ne 0 ] ; then
+ error "Couldn't make backup of currently installed lids";
+ fi
+
+ if [ "${LID[0]}" != "" ]; then
+ $REMOTECPCMD ${LID[0]} $target:/opt/extucode/80f00100.lid ||
+ error "Error copying lid ${LID[0]}";
+ sum=$(md5sum ${LID[0]} | cut -f 1 -d ' ');
+ $SSHCMD "$GET_PROFILE;
+ sumr=\$(md5sum /opt/extucode/80f00100.lid | cut -f 1 -d ' ');
+ if [ \"$sum\" != \"\$sumr\" ] ; then
+ exit 1;
+ fi;" || error "MD5sum doesn't match for ${LID[0]}"
+
+ fi
+
+ if [ "${LID[1]}" != "" ]; then
+ $REMOTECPCMD ${LID[1]} $target:/opt/extucode/80f00101.lid ||
+ error "Error copying lid";
+ sum=$(md5sum ${LID[1]} | cut -f 1 -d ' ');
+ $SSHCMD "$GET_PROFILE;
+ sumr=\$(md5sum /opt/extucode/80f00101.lid | cut -f 1 -d ' ');
+ if [ \"$sum\" != \"\$sumr\" ] ; then
+ exit 1;
+ fi;" || error "MD5sum doesn't match for ${LID[1]}"
+ fi
+
+ if [ "${LID[2]}" != "" ]; then
+ $REMOTECPCMD ${LID[2]} $target:/opt/extucode/80f00102.lid ||
+ error "Error copying lid";
+ sum=$(md5sum ${LID[2]} | cut -f 1 -d ' ');
+ $SSHCMD "$GET_PROFILE;
+ sumr=\$(md5sum /opt/extucode/80f00102.lid | cut -f 1 -d ' ');
+ if [ \"$sum\" != \"\$sumr\" ] ; then
+ exit 1;
+ fi;" || error "MD5sum doesn't match for ${LID[2]}"
+ fi
+
+
+ $SSHCMD "$GET_PROFILE;
+ if [ \$(smgr mfgState) != 'standby' ] ; then
+ exit 1;
+ fi
+ cupdmfg -opt | grep '80f0010'";
+ if [ $? -ne 0 ] ; then
+ error "Could not install lids on the FSP";
+ fi
+
+ sleep 2; #Don't rush the fsp
+}
+
+function boot_firmware {
+ ISTEP_LOG=$(mktemp --tmpdir builder-1.XXXXXX);
+ $SSHCMD "$GET_PROFILE; istep" &> $ISTEP_LOG &
+ msg "Waiting 90 seconds for $target to boot";
+ sleep 90;
+ i=0;
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ while [ \( "$state" != "runtime" \) -a \( "$i" -lt "$BOOT_TIMEOUT" \) ] ; do
+ msg "Waiting $BOOT_SLEEP_PERIOD more seconds";
+ sleep "$BOOT_SLEEP_PERIOD";
+ i=$(expr $i + 1);
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ done;
+
+ if [ "$i" -eq "$BOOT_TIMEOUT" ] ; then
+ state=$($SSHCMD "$GET_PROFILE; smgr mfgState");
+ case "$state" in
+ "ipling")
+ echo "$target still hasn't come up but firmware hasn't specifically crashed";
+ ;;
+ "dumping")
+ echo "$target has crashed";
+ ;;
+ "runtime")
+ echo "Oops, looks like system has managed to come up...";
+ ;;
+ "standby")
+ echo "System is powered off? How can this be?";
+ ;;
+ *)
+ echo "$taget is an unknown state '$state'";
+ ;;
+ esac
+ echo "istep log";
+ cat $ISTEP_LOG;
+ rm -rf $ISTEP_LOG
+ error "Boot test on $target failed";
+ fi
+ rm -rf $ISTEP_LOG;
+}
+
+function machine_sanity_test {
+ $SSHCMD "$GET_PROFILE; test -d /nfs/bin"
+ if [ $? -ne 0 ]; then
+ echo "Failed to read /nfs/bin"
+ error "Is /nfs mounted on the FSP?"
+ fi
+
+ $SSHCMD "$GET_PROFILE; which md5sum > /dev/null && which cupdmfg > /dev/null"
+ if [ $? -ne 0 ]; then
+ echo "Missing md5sum or cupdmfg on the FSP?"
+ error "Is /nfs mounted on the FSP?"
+ fi
+}