diff options
author | Ilya Leoshkevich <iii@linux.ibm.com> | 2025-01-24 12:25:48 +0100 |
---|---|---|
committer | Thomas Huth <thuth@redhat.com> | 2025-01-30 10:39:37 +0100 |
commit | a4cda3f5df2e693893aa076403867fda8dec5584 (patch) | |
tree | a3c7fa4bc0cfa795d57a51bd4093921acaa7d2bf | |
parent | b497b0376cb60ae786ec12cffb259641ec1314eb (diff) | |
download | qemu-a4cda3f5df2e693893aa076403867fda8dec5584.zip qemu-a4cda3f5df2e693893aa076403867fda8dec5584.tar.gz qemu-a4cda3f5df2e693893aa076403867fda8dec5584.tar.bz2 |
hw/s390x/s390-virtio-ccw: Fix a record/replay deadlock
Booting an s390x VM in record/replay mode hangs due to a deadlock
between rr_cpu_thread_fn() and s390_machine_reset(). The former needs
the record/replay mutex held by the latter, and the latter waits until
the former completes its run_on_cpu() request.
Fix by temporarily dropping the record/replay mutex, like it's done in
pause_all_vcpus().
Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Message-ID: <20250124112625.23050-1-iii@linux.ibm.com>
Signed-off-by: Thomas Huth <thuth@redhat.com>
-rw-r--r-- | hw/s390x/s390-virtio-ccw.c | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 3af613d..b069303 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -48,6 +48,7 @@ #include "kvm/kvm_s390x.h" #include "hw/virtio/virtio-md-pci.h" #include "hw/s390x/virtio-ccw-md.h" +#include "system/replay.h" #include CONFIG_DEVICES static Error *pv_mig_blocker; @@ -454,6 +455,18 @@ static void s390_machine_reset(MachineState *machine, ResetType type) CPUState *cs, *t; S390CPU *cpu; + /* + * Temporarily drop the record/replay mutex to let rr_cpu_thread_fn() + * process the run_on_cpu() requests below. This is safe, because at this + * point one of the following is true: + * - All CPU threads are not running, either because the machine is being + * initialized, or because the guest requested a reset using diag 308. + * There is no risk to desync the record/replay state. + * - A snapshot is about to be loaded. The record/replay state consistency + * is not important. + */ + replay_mutex_unlock(); + /* get the reset parameters, reset them once done */ s390_ipl_get_reset_request(&cs, &reset_type); @@ -533,7 +546,7 @@ static void s390_machine_reset(MachineState *machine, ResetType type) * went wrong. */ s390_cpu_set_state(S390_CPU_STATE_OPERATING, cpu); - return; + goto out_lock; } run_on_cpu(cs, s390_do_cpu_load_normal, RUN_ON_CPU_NULL); @@ -546,6 +559,15 @@ static void s390_machine_reset(MachineState *machine, ResetType type) run_on_cpu(t, s390_do_cpu_set_diag318, RUN_ON_CPU_HOST_ULONG(0)); } s390_ipl_clear_reset_request(); + +out_lock: + /* + * Re-take the record/replay mutex, temporarily dropping the BQL in order + * to satisfy the ordering requirements. + */ + bql_unlock(); + replay_mutex_lock(); + bql_lock(); } static void s390_machine_device_pre_plug(HotplugHandler *hotplug_dev, |