aboutsummaryrefslogtreecommitdiff
path: root/hw/occ.c
diff options
context:
space:
mode:
authorStewart Smith <stewart@linux.vnet.ibm.com>2015-09-25 10:35:30 +1000
committerStewart Smith <stewart@linux.vnet.ibm.com>2015-09-25 10:35:30 +1000
commitd856824b73f12c766c5b22fe6ee3c4f05719444b (patch)
tree17b8c669ef7f9028f8834448b4ae3c63832992a6 /hw/occ.c
parent6da0c3e9063127bda3a1a660a34b29d3946dd54b (diff)
downloadskiboot-d856824b73f12c766c5b22fe6ee3c4f05719444b.zip
skiboot-d856824b73f12c766c5b22fe6ee3c4f05719444b.tar.gz
skiboot-d856824b73f12c766c5b22fe6ee3c4f05719444b.tar.bz2
Rate limit OPAL_MSG_OCC to only one outstanding message to host
In the event of a lot of OCC events (or many CPU cores), we could send many OCC messages to the host, which if it wasn't calling opal_get_msg really often, would cause skiboot to malloc() additional messages until we ran out of skiboot heap and things didn't end up being much fun. When running certain hardware exercisers, they seem to steal all time from Linux being able to call opal_get_msg, causing these to queue up and get "opalmsg: No available node in the free list, allocating" warnings followed by tonnes of backtraces of failing memory allocations.
Diffstat (limited to 'hw/occ.c')
-rw-r--r--hw/occ.c19
1 files changed, 17 insertions, 2 deletions
diff --git a/hw/occ.c b/hw/occ.c
index 68b7032..79140cc 100644
--- a/hw/occ.c
+++ b/hw/occ.c
@@ -312,6 +312,14 @@ static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread *
return true;
}
+static bool occ_opal_msg_outstanding = false;
+static void occ_msg_consumed(void *data __unused)
+{
+ lock(&occ_lock);
+ occ_opal_msg_outstanding = false;
+ unlock(&occ_lock);
+}
+
static void occ_throttle_poll(void *data __unused)
{
struct proc_chip *chip;
@@ -345,6 +353,8 @@ static void occ_throttle_poll(void *data __unused)
occ_reset = false;
}
} else {
+ if (occ_opal_msg_outstanding)
+ goto done;
for_each_chip(chip) {
occ_data = chip_occ_data(chip);
if ((occ_data->valid == 1) &&
@@ -353,13 +363,18 @@ static void occ_throttle_poll(void *data __unused)
occ_msg.type = OCC_THROTTLE;
occ_msg.chip = chip->id;
occ_msg.throttle_status = occ_data->throttle;
- rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL,
+ rc = _opal_queue_msg(OPAL_MSG_OCC, NULL,
+ occ_msg_consumed,
3, (uint64_t *)&occ_msg);
- if (!rc)
+ if (!rc) {
chip->throttle = occ_data->throttle;
+ occ_opal_msg_outstanding = true;
+ break;
+ }
}
}
}
+done:
unlock(&occ_lock);
}