aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIgor Mammedov <imammedo@redhat.com>2025-08-14 18:05:59 +0200
committerPaolo Bonzini <pbonzini@redhat.com>2025-08-29 12:48:14 +0200
commit17e645c6f17999cd0306c4d18d6f6cb3db55756d (patch)
tree0d7fe67705ca85cfc77e7947f809f451e6153e8c
parent20c2345290f34aac434284cf9a242c7904d39a27 (diff)
downloadqemu-17e645c6f17999cd0306c4d18d6f6cb3db55756d.zip
qemu-17e645c6f17999cd0306c4d18d6f6cb3db55756d.tar.gz
qemu-17e645c6f17999cd0306c4d18d6f6cb3db55756d.tar.bz2
kvm: i386: irqchip: take BQL only if there is an interrupt
when kernel-irqchip=split is used, QEMU still hits BQL contention issue when reading ACPI PM/HPET timers (despite of timer[s] access being lock-less). So Windows with more than 255 cpus is still not able to boot (since it requires iommu -> split irqchip). Problematic path is in kvm_arch_pre_run() where BQL is taken unconditionally when split irqchip is in use. There are a few parts that BQL protects there: 1. interrupt check and injecting however we do not take BQL when checking for pending interrupt (even within the same function), so the patch takes the same approach for cpu->interrupt_request checks and takes BQL only if there is a job to do. 2. request_interrupt_window access CPUState::kvm_run::request_interrupt_window doesn't need BQL as it's accessed by its own vCPU thread. 3. cr8/cpu_get_apic_tpr access the same (as #2) applies to CPUState::kvm_run::cr8, and APIC registers are also cached/synced (get/put) within the vCPU thread it belongs to. Taking BQL only when is necessary, eleminates BQL bottleneck on IO/MMIO only exit path, improoving latency by 80% on HPET micro benchmark. This lets Windows to boot succesfully (in case hv-time isn't used) when more than 255 vCPUs are in use. Signed-off-by: Igor Mammedov <imammedo@redhat.com> Reviewed-by: Peter Xu <peterx@redhat.com> Link: https://lore.kernel.org/r/20250814160600.2327672-8-imammedo@redhat.com Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-rw-r--r--target/i386/kvm/kvm.c12
1 files changed, 5 insertions, 7 deletions
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index a7b5c8f..306430a 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -5478,9 +5478,6 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
}
}
- if (!kvm_pic_in_kernel()) {
- bql_lock();
- }
/* Force the VCPU out of its inner loop to process any INIT requests
* or (for userspace APIC, but it is cheap to combine the checks here)
@@ -5489,10 +5486,10 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
if (cpu_test_interrupt(cpu, CPU_INTERRUPT_INIT) &&
!(env->hflags & HF_SMM_MASK)) {
- cpu->exit_request = 1;
+ qatomic_set(&cpu->exit_request, 1);
}
if (cpu_test_interrupt(cpu, CPU_INTERRUPT_TPR)) {
- cpu->exit_request = 1;
+ qatomic_set(&cpu->exit_request, 1);
}
}
@@ -5503,6 +5500,8 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
(env->eflags & IF_MASK)) {
int irq;
+ bql_lock();
+
cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
irq = cpu_get_pic_interrupt(env);
if (irq >= 0) {
@@ -5517,6 +5516,7 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
strerror(-ret));
}
}
+ bql_unlock();
}
/* If we have an interrupt but the guest is not ready to receive an
@@ -5531,8 +5531,6 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
DPRINTF("setting tpr\n");
run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state);
-
- bql_unlock();
}
}