aboutsummaryrefslogtreecommitdiff
path: root/hw/slw.c
diff options
context:
space:
mode:
authorGautham R. Shenoy <ego@linux.vnet.ibm.com>2017-08-23 21:01:40 +0530
committerStewart Smith <stewart@linux.vnet.ibm.com>2017-08-24 18:05:10 +1000
commit51974cab7c89504a4ceb6706437665d5e8d2a533 (patch)
tree5bde244249e8222a52ef8699b0ecf28568782b50 /hw/slw.c
parent01af87cd8953de0a0c9056dc45d0c11232ba5849 (diff)
downloadskiboot-51974cab7c89504a4ceb6706437665d5e8d2a533.zip
skiboot-51974cab7c89504a4ceb6706437665d5e8d2a533.tar.gz
skiboot-51974cab7c89504a4ceb6706437665d5e8d2a533.tar.bz2
slw: Modify the power9 stop0_lite latency & residency
Currently skiboot exposes the exit-latency for stop0_lite as 200ns and the target-residency to be 2us. However, the kernel cpu-idle infrastructure rounds up the latency to microseconds and lists the stop0_lite latency as 0us, putting it on par with snooze state. As a result, when the predicted latency is small (< 1us), cpuidle will select stop0_lite instead of snooze. The difference between these states is that snooze doesn't require an interrupt to exit from the state, but stop0_lite does. And the value 200ns doesn't include the interrupt latency. This shows up in the context_switch2 benchmark (http://ozlabs.org/~anton/junkcode/context_switch2.c) where the number of context switches per second with the stop0_lite disabled is found to be roughly 30% more than with stop0_lite enabled. =============================================================================== x latency_200ns_residency_2us + latency_200ns_residency_2us_stop0_lite_disabled N Min Max Median Avg Stddev x 100 222784 473466 294510 302295.26 45380.6 + 100 205316 609420 385198 396338.72 78135.648 Difference at 99.0% confidence 94043.5 +/- 23276.2 31.1098% +/- 7.69983% (Student's t, pooled s = 63892.8) =============================================================================== This can be correlated with the number of times cpuidle enters stop0_lite compared to snooze. =================================================================== latency=200ns, residency=2us stop0_lite enabled. * snooze usage = 7 * stop0 lite usage = 3200324 * stop1 lite usage = 6 stop0_lite disabled * snooze usage: 287846 * stop0_lite usage: 0 * stop1_lite usage: 0 ================================================================== Hence, bump up the exit latency of stop0_lite to 1us. Since the target residency is chosen to be 10 times the exit latency, set the target residency to 10us. With these values, we see a 50% improvement in the number of context switches: ===================================================================== x latency_200ns_residency_2us + latency_1us_residency_10us N Min Max Median Avg Stddev x 100 222784 473466 294510 302295.26 45380.6 + 100 281790 710784 514878 510224.62 85163.252 Difference at 99.0% confidence 207929 +/- 24858.3 68.7835% +/- 8.22319% (Student's t, pooled s = 68235.5) ===================================================================== The cpuidle usage statistics show that we choose stop0_lite less often in such cases. latency = 1us, residency = 10us stop0_lite enabled * snooze usage = 536808 * stop0 lite usage = 3 * stop1 lite usage = 7 Reported-by: Anton Blanchard <anton@samba.org> Signed-off-by: Gautham R. Shenoy <ego@linux.vnet.ibm.com> Signed-off-by: Vaidyanathan Srinivasan <svaidy@linux.vnet.ibm.com> Signed-off-by: Stewart Smith <stewart@linux.vnet.ibm.com>
Diffstat (limited to 'hw/slw.c')
-rw-r--r--hw/slw.c12
1 files changed, 6 insertions, 6 deletions
diff --git a/hw/slw.c b/hw/slw.c
index c0ab9de..98040e6 100644
--- a/hw/slw.c
+++ b/hw/slw.c
@@ -508,8 +508,8 @@ static struct cpu_idle_states power8_cpu_idle_states[] = {
static struct cpu_idle_states power9_cpu_idle_states[] = {
{
.name = "stop0_lite", /* Enter stop0 with no state loss */
- .latency_ns = 200,
- .residency_ns = 2000,
+ .latency_ns = 1000,
+ .residency_ns = 10000,
.flags = 0*OPAL_PM_DEC_STOP \
| 0*OPAL_PM_TIMEBASE_STOP \
| 0*OPAL_PM_LOSE_USER_CONTEXT \
@@ -522,8 +522,8 @@ static struct cpu_idle_states power9_cpu_idle_states[] = {
.pm_ctrl_reg_mask = OPAL_PM_PSSCR_MASK },
{
.name = "stop0",
- .latency_ns = 300,
- .residency_ns = 3000,
+ .latency_ns = 2000,
+ .residency_ns = 20000,
.flags = 0*OPAL_PM_DEC_STOP \
| 0*OPAL_PM_TIMEBASE_STOP \
| 0*OPAL_PM_LOSE_USER_CONTEXT \
@@ -653,8 +653,8 @@ static struct cpu_idle_states power9_cpu_idle_states[] = {
static struct cpu_idle_states power9_ndd1_cpu_idle_states[] = {
{
.name = "stop0_lite",
- .latency_ns = 200,
- .residency_ns = 2000,
+ .latency_ns = 1000,
+ .residency_ns = 10000,
.flags = 0*OPAL_PM_DEC_STOP \
| 0*OPAL_PM_TIMEBASE_STOP \
| 0*OPAL_PM_LOSE_USER_CONTEXT \