diff options
author | Stewart Smith <stewart@linux.vnet.ibm.com> | 2015-09-25 10:35:30 +1000 |
---|---|---|
committer | Stewart Smith <stewart@linux.vnet.ibm.com> | 2015-09-25 10:35:30 +1000 |
commit | d856824b73f12c766c5b22fe6ee3c4f05719444b (patch) | |
tree | 17b8c669ef7f9028f8834448b4ae3c63832992a6 /hw/occ.c | |
parent | 6da0c3e9063127bda3a1a660a34b29d3946dd54b (diff) | |
download | skiboot-d856824b73f12c766c5b22fe6ee3c4f05719444b.zip skiboot-d856824b73f12c766c5b22fe6ee3c4f05719444b.tar.gz skiboot-d856824b73f12c766c5b22fe6ee3c4f05719444b.tar.bz2 |
Rate limit OPAL_MSG_OCC to only one outstanding message to host
In the event of a lot of OCC events (or many CPU cores), we could
send many OCC messages to the host, which if it wasn't calling
opal_get_msg really often, would cause skiboot to malloc() additional
messages until we ran out of skiboot heap and things didn't end up
being much fun.
When running certain hardware exercisers, they seem to steal all time
from Linux being able to call opal_get_msg, causing these to queue up
and get "opalmsg: No available node in the free list, allocating" warnings
followed by tonnes of backtraces of failing memory allocations.
Diffstat (limited to 'hw/occ.c')
-rw-r--r-- | hw/occ.c | 19 |
1 files changed, 17 insertions, 2 deletions
@@ -312,6 +312,14 @@ static bool cpu_pstates_prepare_core(struct proc_chip *chip, struct cpu_thread * return true; } +static bool occ_opal_msg_outstanding = false; +static void occ_msg_consumed(void *data __unused) +{ + lock(&occ_lock); + occ_opal_msg_outstanding = false; + unlock(&occ_lock); +} + static void occ_throttle_poll(void *data __unused) { struct proc_chip *chip; @@ -345,6 +353,8 @@ static void occ_throttle_poll(void *data __unused) occ_reset = false; } } else { + if (occ_opal_msg_outstanding) + goto done; for_each_chip(chip) { occ_data = chip_occ_data(chip); if ((occ_data->valid == 1) && @@ -353,13 +363,18 @@ static void occ_throttle_poll(void *data __unused) occ_msg.type = OCC_THROTTLE; occ_msg.chip = chip->id; occ_msg.throttle_status = occ_data->throttle; - rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, NULL, + rc = _opal_queue_msg(OPAL_MSG_OCC, NULL, + occ_msg_consumed, 3, (uint64_t *)&occ_msg); - if (!rc) + if (!rc) { chip->throttle = occ_data->throttle; + occ_opal_msg_outstanding = true; + break; + } } } } +done: unlock(&occ_lock); } |