aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristian Borntraeger <borntraeger@de.ibm.com>2017-01-26 20:23:30 +0100
committerCornelia Huck <cornelia.huck@de.ibm.com>2017-02-24 10:15:18 +0100
commit409422cd83f36450a3c0bdd49163786cbff3a7ea (patch)
tree8ace7d46929dc1274060aba84b4b57131a317dd6
parent94b5024b1f466b4b8f03e7b2219f77e9a3a24d51 (diff)
downloadqemu-409422cd83f36450a3c0bdd49163786cbff3a7ea.zip
qemu-409422cd83f36450a3c0bdd49163786cbff3a7ea.tar.gz
qemu-409422cd83f36450a3c0bdd49163786cbff3a7ea.tar.bz2
s390x/kvm: detect some program check loops
Sometimes (e.g. early boot) a guest is broken in such ways that it loops 100% delivering operation exceptions (illegal operation) but the pgm new PSW is not set properly. This will result in code being read from address zero, which usually contains another illegal op. Let's detect this case and put the guest in crashed state. Instead of only detecting this for address zero apply a heuristic that will work for any program check new psw so that it will also reach the crashed state if you provide some random elf file to the -kernel option. We do not want guest problem state to be able to trigger a guest panic, e.g. by faulting on an address that is the same as the program check new PSW, so we check for the problem state bit being off. With this we a: get rid of CPU consumption of such broken guests b: keep the program old PSW. This allows to find out the original illegal operation - making debugging such early boot issues much easier than with single stepping This relies on the kernel using a similar heuristic and passing such operation exceptions to user space. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
-rw-r--r--target/s390x/kvm.c43
1 files changed, 40 insertions, 3 deletions
diff --git a/target/s390x/kvm.c b/target/s390x/kvm.c
index 2536780..5ec050c 100644
--- a/target/s390x/kvm.c
+++ b/target/s390x/kvm.c
@@ -1867,6 +1867,40 @@ static void unmanageable_intercept(S390CPU *cpu, const char *str, int pswoffset)
qemu_system_guest_panicked(NULL);
}
+/* try to detect pgm check loops */
+static int handle_oper_loop(S390CPU *cpu, struct kvm_run *run)
+{
+ CPUState *cs = CPU(cpu);
+ PSW oldpsw, newpsw;
+
+ cpu_synchronize_state(cs);
+ newpsw.mask = ldq_phys(cs->as, cpu->env.psa +
+ offsetof(LowCore, program_new_psw));
+ newpsw.addr = ldq_phys(cs->as, cpu->env.psa +
+ offsetof(LowCore, program_new_psw) + 8);
+ oldpsw.mask = run->psw_mask;
+ oldpsw.addr = run->psw_addr;
+ /*
+ * Avoid endless loops of operation exceptions, if the pgm new
+ * PSW will cause a new operation exception.
+ * The heuristic checks if the pgm new psw is within 6 bytes before
+ * the faulting psw address (with same DAT, AS settings) and the
+ * new psw is not a wait psw and the fault was not triggered by
+ * problem state. In that case go into crashed state.
+ */
+
+ if (oldpsw.addr - newpsw.addr <= 6 &&
+ !(newpsw.mask & PSW_MASK_WAIT) &&
+ !(oldpsw.mask & PSW_MASK_PSTATE) &&
+ (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) &&
+ (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT)) {
+ unmanageable_intercept(cpu, "operation exception loop",
+ offsetof(LowCore, program_new_psw));
+ return EXCP_HALTED;
+ }
+ return 0;
+}
+
static int handle_intercept(S390CPU *cpu)
{
CPUState *cs = CPU(cpu);
@@ -1914,11 +1948,14 @@ static int handle_intercept(S390CPU *cpu)
r = EXCP_HALTED;
break;
case ICPT_OPEREXC:
- /* currently only instr 0x0000 after enabled via capability */
+ /* check for break points */
r = handle_sw_breakpoint(cpu, run);
if (r == -ENOENT) {
- enter_pgmcheck(cpu, PGM_OPERATION);
- r = 0;
+ /* Then check for potential pgm check loops */
+ r = handle_oper_loop(cpu, run);
+ if (r == 0) {
+ enter_pgmcheck(cpu, PGM_OPERATION);
+ }
}
break;
case ICPT_SOFT_INTERCEPT: