diff options
Diffstat (limited to 'core')
-rw-r--r-- | core/affinity.c | 2 | ||||
-rw-r--r-- | core/chip.c | 40 | ||||
-rw-r--r-- | core/cpu.c | 32 | ||||
-rw-r--r-- | core/direct-controls.c | 363 | ||||
-rw-r--r-- | core/hmi.c | 221 | ||||
-rw-r--r-- | core/init.c | 2 | ||||
-rw-r--r-- | core/mce.c | 129 | ||||
-rw-r--r-- | core/test/run-timer.c | 2 |
8 files changed, 725 insertions, 66 deletions
diff --git a/core/affinity.c b/core/affinity.c index 47ba33c..0209d3c 100644 --- a/core/affinity.c +++ b/core/affinity.c @@ -111,6 +111,8 @@ void add_core_associativity(struct cpu_thread *cpu) core_id = (cpu->pir >> 3) & 0xf; else if (proc_gen == proc_gen_p9) core_id = (cpu->pir >> 2) & 0x1f; + else if (proc_gen == proc_gen_p10) + core_id = (cpu->pir >> 2) & 0x1f; else return; diff --git a/core/chip.c b/core/chip.c index f1269d3..f79e8cd 100644 --- a/core/chip.c +++ b/core/chip.c @@ -13,7 +13,9 @@ enum proc_chip_quirks proc_chip_quirks; uint32_t pir_to_chip_id(uint32_t pir) { - if (proc_gen == proc_gen_p9) + if (proc_gen == proc_gen_p10) + return P10_PIR2GCID(pir); + else if (proc_gen == proc_gen_p9) return P9_PIR2GCID(pir); else if (proc_gen == proc_gen_p8) return P8_PIR2GCID(pir); @@ -23,41 +25,59 @@ uint32_t pir_to_chip_id(uint32_t pir) uint32_t pir_to_core_id(uint32_t pir) { - if (proc_gen == proc_gen_p9) { + if (proc_gen == proc_gen_p10) { + if (this_cpu()->is_fused_core) + return P10_PIRFUSED2NORMALCOREID(pir); + else + return P10_PIR2COREID(pir); + } else if (proc_gen == proc_gen_p9) { if (this_cpu()->is_fused_core) return P9_PIRFUSED2NORMALCOREID(pir); else return P9_PIR2COREID(pir); - } else if (proc_gen == proc_gen_p8) + } else if (proc_gen == proc_gen_p8) { return P8_PIR2COREID(pir); - else + } else { assert(false); + } } uint32_t pir_to_fused_core_id(uint32_t pir) { - if (proc_gen == proc_gen_p9) { + if (proc_gen == proc_gen_p10) { + if (this_cpu()->is_fused_core) + return P10_PIR2FUSEDCOREID(pir); + else + return P10_PIR2COREID(pir); + } else if (proc_gen == proc_gen_p9) { if (this_cpu()->is_fused_core) return P9_PIR2FUSEDCOREID(pir); else return P9_PIR2COREID(pir); - } else if (proc_gen == proc_gen_p8) + } else if (proc_gen == proc_gen_p8) { return P8_PIR2COREID(pir); - else + } else { assert(false); + } } uint32_t pir_to_thread_id(uint32_t pir) { - if (proc_gen == proc_gen_p9) { + if (proc_gen == proc_gen_p10) { + if (this_cpu()->is_fused_core) + return P10_PIRFUSED2NORMALTHREADID(pir); + else + return P10_PIR2THREADID(pir); + } else if (proc_gen == proc_gen_p9) { if (this_cpu()->is_fused_core) return P9_PIRFUSED2NORMALTHREADID(pir); else return P9_PIR2THREADID(pir); - } else if (proc_gen == proc_gen_p8) + } else if (proc_gen == proc_gen_p8) { return P8_PIR2THREADID(pir); - else + } else { assert(false); + } } struct proc_chip *next_chip(struct proc_chip *chip) @@ -100,7 +100,7 @@ static void cpu_wake(struct cpu_thread *cpu) if (proc_gen == proc_gen_p8) { /* Poke IPI */ icp_kick_cpu(cpu); - } else if (proc_gen == proc_gen_p9) { + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { p9_dbell_send(cpu->pir); } } @@ -507,6 +507,9 @@ static void cpu_idle_pm(enum cpu_wake_cause wake_on) case proc_gen_p9: vec = cpu_idle_p9(wake_on); break; + case proc_gen_p10: + vec = cpu_idle_p9(wake_on); + break; default: vec = 0; prlog_once(PR_DEBUG, "cpu_idle_pm called with bad processor type\n"); @@ -605,7 +608,7 @@ static void cpu_pm_disable(void) cpu_relax(); } } - } else if (proc_gen == proc_gen_p9) { + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { for_each_available_cpu(cpu) { if (cpu->in_sleep || cpu->in_idle) p9_dbell_send(cpu->pir); @@ -648,7 +651,7 @@ void cpu_set_sreset_enable(bool enabled) pm_enabled = true; } - } else if (proc_gen == proc_gen_p9) { + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { sreset_enabled = enabled; sync(); /* @@ -676,7 +679,7 @@ void cpu_set_ipi_enable(bool enabled) pm_enabled = true; } - } else if (proc_gen == proc_gen_p9) { + } else if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) { ipi_enabled = enabled; sync(); if (!enabled) @@ -1014,6 +1017,13 @@ void init_boot_cpu(void) hid0_hile = SPR_HID0_POWER9_HILE; hid0_attn = SPR_HID0_POWER9_ENABLE_ATTN; break; + case PVR_TYPE_P10: + proc_gen = proc_gen_p10; + hile_supported = true; + radix_supported = true; + hid0_hile = SPR_HID0_POWER10_HILE; + hid0_attn = SPR_HID0_POWER10_ENABLE_ATTN; + break; default: proc_gen = proc_gen_unknown; } @@ -1033,6 +1043,14 @@ void init_boot_cpu(void) prlog(PR_INFO, "CPU: P9 generation processor" " (max %d threads/core)\n", cpu_thread_count); break; + case proc_gen_p10: + if (is_fused_core(pvr)) + cpu_thread_count = 8; + else + cpu_thread_count = 4; + prlog(PR_INFO, "CPU: P10 generation processor" + " (max %d threads/core)\n", cpu_thread_count); + break; default: prerror("CPU: Unknown PVR, assuming 1 thread\n"); cpu_thread_count = 1; @@ -1535,7 +1553,8 @@ void cpu_fast_reboot_complete(void) current_hile_mode = HAVE_LITTLE_ENDIAN; /* and set HID0:RADIX */ - current_radix_mode = true; + if (proc_gen == proc_gen_p9) + current_radix_mode = true; } static int64_t opal_reinit_cpus(uint64_t flags) @@ -1616,7 +1635,8 @@ static int64_t opal_reinit_cpus(uint64_t flags) flags &= ~(OPAL_REINIT_CPUS_MMU_HASH | OPAL_REINIT_CPUS_MMU_RADIX); - if (radix != current_radix_mode) { + + if (proc_gen == proc_gen_p9 && radix != current_radix_mode) { if (radix) req.set_bits |= SPR_HID0_POWER9_RADIX; else diff --git a/core/direct-controls.c b/core/direct-controls.c index 0274367..f7509dd 100644 --- a/core/direct-controls.c +++ b/core/direct-controls.c @@ -12,6 +12,7 @@ #include <xscom.h> #include <xscom-p8-regs.h> #include <xscom-p9-regs.h> +#include <xscom-p10-regs.h> #include <timebase.h> #include <chip.h> @@ -268,6 +269,25 @@ static int p8_sreset_thread(struct cpu_thread *cpu) * using scom registers. */ +static int p9_core_is_gated(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t sshhyp_addr; + uint64_t val; + + sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP); + + if (xscom_read(chip_id, sshhyp_addr, &val)) { + prlog(PR_ERR, "Could not query core gated on %u:%u:" + " Unable to read PPM_SSHHYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + return !!(val & P9_CORE_GATED); +} + static int p9_core_set_special_wakeup(struct cpu_thread *cpu) { uint32_t chip_id = pir_to_chip_id(cpu->pir); @@ -301,7 +321,7 @@ static int p9_core_set_special_wakeup(struct cpu_thread *cpu) * out of stop state. If CORE_GATED is still set then * raise error. */ - if (dctl_core_is_gated(cpu)) { + if (p9_core_is_gated(cpu)) { /* Deassert spwu for this strange error */ xscom_write(chip_id, swake_addr, 0); prlog(PR_ERR, "Failed special wakeup on %u:%u" @@ -517,6 +537,295 @@ static int p9_sreset_thread(struct cpu_thread *cpu) return 0; } +/**************** POWER10 direct controls ****************/ + +/* Long running instructions may take time to complete. Timeout 100ms */ +#define P10_QUIESCE_POLL_INTERVAL 100 +#define P10_QUIESCE_TIMEOUT 100000 + +/* Waking may take up to 5ms for deepest sleep states. Set timeout to 100ms */ +#define P10_SPWU_POLL_INTERVAL 100 +#define P10_SPWU_TIMEOUT 100000 + +/* + * This implements direct control facilities of processor cores and threads + * using scom registers. + */ +static int p10_core_is_gated(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t ssh_addr; + uint64_t val; + + ssh_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SSH_HYP); + + if (xscom_read(chip_id, ssh_addr, &val)) { + prlog(PR_ERR, "Could not query core gated on %u:%u:" + " Unable to read QME_SSH_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + return !!(val & P10_SSH_CORE_GATED); +} + + +static int p10_core_set_special_wakeup(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t spwu_addr, ssh_addr; + uint64_t val; + int i; + + /* P10 could use SPWU_HYP done bit instead of SSH? */ + spwu_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SPWU_HYP); + ssh_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SSH_HYP); + + if (xscom_write(chip_id, spwu_addr, P10_SPWU_REQ)) { + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " Unable to write QME_SPWU_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + for (i = 0; i < P10_SPWU_TIMEOUT / P10_SPWU_POLL_INTERVAL; i++) { + if (xscom_read(chip_id, ssh_addr, &val)) { + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " Unable to read QME_SSH_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + if (val & P10_SSH_SPWU_DONE) { + /* + * CORE_GATED will be unset on a successful special + * wakeup of the core which indicates that the core is + * out of stop state. If CORE_GATED is still set then + * raise error. + */ + if (p10_core_is_gated(cpu)) { + /* Deassert spwu for this strange error */ + xscom_write(chip_id, spwu_addr, 0); + prlog(PR_ERR, "Failed special wakeup on %u:%u" + " core remains gated.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } else { + return 0; + } + } + time_wait_us(P10_SPWU_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not set special wakeup on %u:%u:" + " operation timeout.\n", + chip_id, core_id); + /* + * As per the special wakeup protocol we should not de-assert + * the special wakeup on the core until WAKEUP_DONE is set. + * So even on error do not de-assert. + */ + + return OPAL_HARDWARE; +} + +static int p10_core_clear_special_wakeup(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t spwu_addr; + + spwu_addr = XSCOM_ADDR_P10_QME_CORE(core_id, P10_QME_SPWU_HYP); + + /* Add a small delay here if spwu problems time_wait_us(1); */ + if (xscom_write(chip_id, spwu_addr, 0)) { + prlog(PR_ERR, "Could not clear special wakeup on %u:%u:" + " Unable to write QME_SPWU_HYP.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + return 0; +} + +static int p10_thread_quiesced(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t ras_addr; + uint64_t ras_status; + + ras_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_RAS_STATUS); + if (xscom_read(chip_id, ras_addr, &ras_status)) { + prlog(PR_ERR, "Could not check thread state on %u:%u:" + " Unable to read EC_RAS_STATUS.\n", + chip_id, core_id); + return OPAL_HARDWARE; + } + + /* + * p10_thread_stop for the purpose of sreset wants QUIESCED + * and MAINT bits set. Step, RAM, etc. need more, but we don't + * use those in skiboot. + * + * P10 could try wait for more here in case of errors. + */ + if (!(ras_status & P10_THREAD_QUIESCED(thread_id))) + return 0; + + if (!(ras_status & P10_THREAD_MAINT(thread_id))) + return 0; + + return 1; +} + +static int p10_cont_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t cts_addr; + uint32_t ti_addr; + uint32_t dctl_addr; + uint64_t core_thread_state; + uint64_t thread_info; + bool active, stop; + int rc; + int i; + + rc = p10_thread_quiesced(cpu); + if (rc < 0) + return rc; + if (!rc) { + prlog(PR_ERR, "Could not cont thread %u:%u:%u:" + " Thread is not quiesced.\n", + chip_id, core_id, thread_id); + return OPAL_BUSY; + } + + cts_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_CORE_THREAD_STATE); + ti_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_THREAD_INFO); + dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS); + + if (xscom_read(chip_id, cts_addr, &core_thread_state)) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to read EC_CORE_THREAD_STATE.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (core_thread_state & P10_THREAD_STOPPED(thread_id)) + stop = true; + else + stop = false; + + if (xscom_read(chip_id, ti_addr, &thread_info)) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to read EC_THREAD_INFO.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + if (thread_info & P10_THREAD_ACTIVE(thread_id)) + active = true; + else + active = false; + + if (!active || stop) { + if (xscom_write(chip_id, dctl_addr, P10_THREAD_CLEAR_MAINT(thread_id))) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + } + } else { + if (xscom_write(chip_id, dctl_addr, P10_THREAD_START(thread_id))) { + prlog(PR_ERR, "Could not resume thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + } + } + + for (i = 0; i < P10_QUIESCE_TIMEOUT / P10_QUIESCE_POLL_INTERVAL; i++) { + int rc = p10_thread_quiesced(cpu); + if (rc < 0) + break; + if (!rc) + return 0; + + time_wait_us(P10_QUIESCE_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not start thread %u:%u:%u:" + " Unable to start thread.\n", + chip_id, core_id, thread_id); + + return OPAL_HARDWARE; +} + +static int p10_stop_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t dctl_addr; + int rc; + int i; + + dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS); + + rc = p10_thread_quiesced(cpu); + if (rc < 0) + return rc; + if (rc) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Thread is quiesced already.\n", + chip_id, core_id, thread_id); + return OPAL_BUSY; + } + + if (xscom_write(chip_id, dctl_addr, P10_THREAD_STOP(thread_id))) { + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + for (i = 0; i < P10_QUIESCE_TIMEOUT / P10_QUIESCE_POLL_INTERVAL; i++) { + int rc = p10_thread_quiesced(cpu); + if (rc < 0) + break; + if (rc) + return 0; + + time_wait_us(P10_QUIESCE_POLL_INTERVAL); + } + + prlog(PR_ERR, "Could not stop thread %u:%u:%u:" + " Unable to quiesce thread.\n", + chip_id, core_id, thread_id); + + return OPAL_HARDWARE; +} + +static int p10_sreset_thread(struct cpu_thread *cpu) +{ + uint32_t chip_id = pir_to_chip_id(cpu->pir); + uint32_t core_id = pir_to_core_id(cpu->pir); + uint32_t thread_id = pir_to_thread_id(cpu->pir); + uint32_t dctl_addr; + + dctl_addr = XSCOM_ADDR_P10_EC(core_id, P10_EC_DIRECT_CONTROLS); + + if (xscom_write(chip_id, dctl_addr, P10_THREAD_SRESET(thread_id))) { + prlog(PR_ERR, "Could not sreset thread %u:%u:%u:" + " Unable to write EC_DIRECT_CONTROLS.\n", + chip_id, core_id, thread_id); + return OPAL_HARDWARE; + } + + return 0; +} + /**************** generic direct controls ****************/ int dctl_set_special_wakeup(struct cpu_thread *t) @@ -529,7 +838,9 @@ int dctl_set_special_wakeup(struct cpu_thread *t) lock(&c->dctl_lock); if (c->special_wakeup_count == 0) { - if (proc_gen == proc_gen_p9) + if (proc_gen == proc_gen_p10) + rc = p10_core_set_special_wakeup(c); + else if (proc_gen == proc_gen_p9) rc = p9_core_set_special_wakeup(c); else /* (proc_gen == proc_gen_p8) */ rc = p8_core_set_special_wakeup(c); @@ -553,7 +864,9 @@ int dctl_clear_special_wakeup(struct cpu_thread *t) if (!c->special_wakeup_count) goto out; if (c->special_wakeup_count == 1) { - if (proc_gen == proc_gen_p9) + if (proc_gen == proc_gen_p10) + rc = p10_core_clear_special_wakeup(c); + else if (proc_gen == proc_gen_p9) rc = p9_core_clear_special_wakeup(c); else /* (proc_gen == proc_gen_p8) */ rc = p8_core_clear_special_wakeup(c); @@ -569,24 +882,13 @@ out: int dctl_core_is_gated(struct cpu_thread *t) { struct cpu_thread *c = t->primary; - uint32_t chip_id = pir_to_chip_id(c->pir); - uint32_t core_id = pir_to_core_id(c->pir); - uint32_t sshhyp_addr; - uint64_t val; - if (proc_gen != proc_gen_p9) + if (proc_gen == proc_gen_p10) + return p10_core_is_gated(c); + else if (proc_gen == proc_gen_p9) + return p9_core_is_gated(c); + else return OPAL_UNSUPPORTED; - - sshhyp_addr = XSCOM_ADDR_P9_EC_SLAVE(core_id, P9_EC_PPM_SSHHYP); - - if (xscom_read(chip_id, sshhyp_addr, &val)) { - prlog(PR_ERR, "Could not query core gated on %u:%u:" - " Unable to read PPM_SSHHYP.\n", - chip_id, core_id); - return OPAL_HARDWARE; - } - - return !!(val & P9_CORE_GATED); } static int dctl_stop(struct cpu_thread *t) @@ -599,7 +901,9 @@ static int dctl_stop(struct cpu_thread *t) unlock(&c->dctl_lock); return OPAL_BUSY; } - if (proc_gen == proc_gen_p9) + if (proc_gen == proc_gen_p10) + rc = p10_stop_thread(t); + else if (proc_gen == proc_gen_p9) rc = p9_stop_thread(t); else /* (proc_gen == proc_gen_p8) */ rc = p8_stop_thread(t); @@ -615,7 +919,7 @@ static int dctl_cont(struct cpu_thread *t) struct cpu_thread *c = t->primary; int rc; - if (proc_gen != proc_gen_p9) + if (proc_gen != proc_gen_p10 && proc_gen != proc_gen_p9) return OPAL_UNSUPPORTED; lock(&c->dctl_lock); @@ -623,7 +927,10 @@ static int dctl_cont(struct cpu_thread *t) unlock(&c->dctl_lock); return OPAL_BUSY; } - rc = p9_cont_thread(t); + if (proc_gen == proc_gen_p10) + rc = p10_cont_thread(t); + else /* (proc_gen == proc_gen_p9) */ + rc = p9_cont_thread(t); if (!rc) t->dctl_stopped = false; unlock(&c->dctl_lock); @@ -647,7 +954,9 @@ static int dctl_sreset(struct cpu_thread *t) unlock(&c->dctl_lock); return OPAL_BUSY; } - if (proc_gen == proc_gen_p9) + if (proc_gen == proc_gen_p10) + rc = p10_sreset_thread(t); + else if (proc_gen == proc_gen_p9) rc = p9_sreset_thread(t); else /* (proc_gen == proc_gen_p8) */ rc = p8_sreset_thread(t); @@ -752,7 +1061,7 @@ int sreset_all_others(void) * Then sreset the target thread, which resumes execution on that thread. * Then de-assert special wakeup on the core. */ -static int64_t p9_sreset_cpu(struct cpu_thread *cpu) +static int64_t do_sreset_cpu(struct cpu_thread *cpu) { int rc; @@ -792,7 +1101,7 @@ int64_t opal_signal_system_reset(int cpu_nr) struct cpu_thread *cpu; int64_t ret; - if (proc_gen != proc_gen_p9) + if (proc_gen != proc_gen_p9 && proc_gen != proc_gen_p10) return OPAL_UNSUPPORTED; /* @@ -811,7 +1120,7 @@ int64_t opal_signal_system_reset(int cpu_nr) } lock(&sreset_lock); - ret = p9_sreset_cpu(cpu); + ret = do_sreset_cpu(cpu); unlock(&sreset_lock); return ret; @@ -822,7 +1131,7 @@ void direct_controls_init(void) if (chip_quirk(QUIRK_MAMBO_CALLOUTS)) return; - if (proc_gen != proc_gen_p9) + if (proc_gen != proc_gen_p9 && proc_gen != proc_gen_p10) return; opal_register(OPAL_SIGNAL_SYSTEM_RESET, opal_signal_system_reset, 1); @@ -15,6 +15,7 @@ #include <xscom.h> #include <xscom-p8-regs.h> #include <xscom-p9-regs.h> +#include <xscom-p10-regs.h> #include <pci.h> #include <cpu.h> #include <chip.h> @@ -27,7 +28,7 @@ #include <cpu.h> /* - * HMER register layout: + * P9 HMER register layout: * +===+==========+============================+========+===================+ * |Bit|Name |Description |PowerKVM|Action | * | | | |HMI | | @@ -147,6 +148,78 @@ * NOTE: Per Dave Larson, never enable 8,9,21-23 */ +/* + * P10 HMER register layout: + * Bit Name Description + * 0 malfunction_alert A processor core in the system has checkstopped + * (failed recovery). This is broadcasted to every + * processor in the system + * + * 1 reserved reserved + * + * 2 proc_rcvy_done Processor recovery occurred error-bit in fir not + * masked (see bit 11) + * + * 3 reserved reserved + * + * 4 tfac_error Timer facility experienced an error. TB, DEC, + * HDEC, PURR or SPURR may be corrupted (details in + * TFMR) + * + * 5 tfx_error Error occurred on transfer from tfac shadow to + * core + * + * 6 spurr_scale_limit Nominal frequency exceeded 399 percent + * + * 7 reserved reserved + * + * 8 xscom_fail An XSCOM operation caused by a cache inhibited + * load/store from this thread failed. A trap + * register is available. + * + * 9 xscom_done An XSCOM operation caused by a cache inhibited + * load/store from this thread completed. If + * hypervisor intends to use this bit, it is + * responsible for clearing it before performing the + * xscom operation. NOTE: this bit should always be + * masked in HMEER + * + * 10 reserved reserved + * + * 11 proc_rcvy_again Processor recovery occurred again before bit 2 + * was cleared + * + * 12-15 reserved reserved + * + * 16 scom_fir_hmi An error inject to PC FIR has occurred to set HMI. + * This error inject can also set FIR(61) to cause + * recovery. + * + * 17 reserved reserved + * + * 18 trig_fir_hmi Debug trigger has occurred to set HMI. This + * trigger can also set FIR(60) to cause recovery + * + * 19-20 reserved reserved + * + * 21-23 xscom_status If bit 8 is active, the reason will be detailed in + * these bits. These bits are information only and + * always masked (mask = ‘0’) If hypervisor intends + * to use this field, it is responsible for clearing + * it before performing the xscom operation. + * + * 24:63 Not implemented Not implemented. + * + * P10 HMEER enabled bits: + * Name Action + * malfunction_alert Decode and log FIR bits. + * proc_rcvy_done Log and continue. + * tfac_error Log and attempt to recover time facilities. + * tfx_error Log and attempt to recover time facilities. + * spurr_scale_limit Log and continue. XXX? + * proc_rcvy_again Log and continue. + */ + /* Used for tracking cpu threads inside hmi handling. */ #define HMI_STATE_CLEANUP_DONE 0x100 #define CORE_THREAD_MASK 0x0ff @@ -174,13 +247,17 @@ (SPR_TFMR_TBST_CORRUPT | SPR_TFMR_TB_MISSING_SYNC | \ SPR_TFMR_TB_MISSING_STEP | SPR_TFMR_FW_CONTROL_ERR | \ SPR_TFMR_TFMR_CORRUPT | SPR_TFMR_TB_RESIDUE_ERR | \ - SPR_TFMR_HDEC_PARITY_ERROR) + SPR_TFMR_HDEC_PARITY_ERROR | SPR_TFMR_TFAC_XFER_ERROR) /* TFMR "thread" errors */ #define SPR_TFMR_THREAD_ERRORS \ (SPR_TFMR_PURR_PARITY_ERR | SPR_TFMR_SPURR_PARITY_ERR | \ SPR_TFMR_DEC_PARITY_ERR) +/* + * Starting from p9, core inits are setup to escalate all core + * local checkstop to system checkstop. Review this list when that changes. + */ static const struct core_xstop_bit_info { uint8_t bit; /* CORE FIR bit number */ enum OpalHMI_CoreXstopReason reason; @@ -203,10 +280,12 @@ static const struct core_xstop_bit_info { { 63, CORE_CHECKSTOP_PC_SPRD_HYP_ERR_INJ }, }; -static const struct core_recoverable_bit_info { +struct core_fir_bit_info { uint8_t bit; /* CORE FIR bit number */ const char *reason; -} recoverable_bits[] = { +}; + +static const struct core_fir_bit_info p9_recoverable_bits[] = { { 0, "IFU - SRAM (ICACHE parity, etc)" }, { 2, "IFU - RegFile" }, { 4, "IFU - Logic" }, @@ -226,6 +305,58 @@ static const struct core_recoverable_bit_info { { 43, "PC - Thread hang recovery" }, }; +static const struct core_fir_bit_info p10_core_fir_bits[] = { + { 0, "IFU - SRAM recoverable error (ICACHE parity error, etc.)" }, + { 1, "PC - TC checkstop" }, + { 2, "IFU - RegFile recoverable error" }, + { 3, "IFU - RegFile core checkstop" }, + { 4, "IFU - Logic recoverable error" }, + { 5, "IFU - Logic core checkstop" }, + { 7, "VSU - Inference accumulator recoverable error" }, + { 8, "PC - Recovery core checkstop" }, + { 9, "VSU - Slice Target File (STF) recoverable error" }, + { 11, "ISU - Logic recoverable error" }, + { 12, "ISU - Logic core checkstop" }, + { 14, "ISU - Machine check received while ME=0 checkstop" }, + { 15, "ISU - UE from L2" }, + { 16, "ISU - Number of UEs from L2 above threshold" }, + { 17, "ISU - UE on CI load" }, + { 18, "MMU - TLB recoverable error" }, + { 19, "MMU - SLB error" }, + { 21, "MMU - CXT recoverable error" }, + { 22, "MMU - Logic core checkstop" }, + { 23, "MMU - MMU system checkstop" }, + { 24, "VSU - Logic recoverable error" }, + { 25, "VSU - Logic core checkstop" }, + { 26, "PC - In maint mode and recovery in progress" }, + { 28, "PC - PC system checkstop" }, + { 29, "LSU - SRAM recoverable error (DCACHE parity error, etc.)" }, + { 30, "LSU - Set deleted" }, + { 31, "LSU - RegFile recoverable error" }, + { 32, "LSU - RegFile core checkstop" }, + { 33, "MMU - TLB multi hit error occurred" }, + { 34, "MMU - SLB multi hit error occurred" }, + { 35, "LSU - ERAT multi hit error occurred" }, + { 36, "PC - Forward progress error" }, + { 37, "LSU - Logic recoverable error" }, + { 38, "LSU - Logic core checkstop" }, + { 41, "LSU - System checkstop" }, + { 43, "PC - Thread hang recoverable error" }, + { 45, "PC - Logic core checkstop" }, + { 47, "PC - TimeBase facility checkstop" }, + { 52, "PC - Hang recovery failed core checkstop" }, + { 53, "PC - Core internal hang detected" }, + { 55, "PC - Nest hang detected" }, + { 56, "PC - Other core chiplet recoverable error" }, + { 57, "PC - Other core chiplet core checkstop" }, + { 58, "PC - Other core chiplet system checkstop" }, + { 59, "PC - SCOM satellite error detected" }, + { 60, "PC - Debug trigger error inject" }, + { 61, "PC - SCOM or firmware recoverable error inject" }, + { 62, "PC - Firmware checkstop error inject" }, + { 63, "PC - Firmware SPRC / SPRD checkstop" }, +}; + static const struct nx_xstop_bit_info { uint8_t bit; /* NX FIR bit number */ enum OpalHMI_NestAccelXstopReason reason; @@ -270,6 +401,12 @@ static int setup_scom_addresses(void) nx_dma_engine_fir = P9_NX_DMA_ENGINE_FIR; nx_pbi_fir = P9_NX_PBI_FIR; return 1; + case proc_gen_p10: + malf_alert_scom = P10_MALFUNC_ALERT; + nx_status_reg = P10_NX_STATUS_REG; + nx_dma_engine_fir = P10_NX_DMA_ENGINE_FIR; + nx_pbi_fir = P10_NX_PBI_FIR; + return 1; default: prerror("%s: Unknown CPU type\n", __func__); break; @@ -320,6 +457,10 @@ static int read_core_fir(uint32_t chip_id, uint32_t core_id, uint64_t *core_fir) rc = xscom_read(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_CORE_FIR), core_fir); break; + case proc_gen_p10: + rc = xscom_read(chip_id, + XSCOM_ADDR_P10_EC(core_id, P10_CORE_FIR), core_fir); + break; default: rc = OPAL_HARDWARE; } @@ -335,6 +476,10 @@ static int read_core_wof(uint32_t chip_id, uint32_t core_id, uint64_t *core_wof) rc = xscom_read(chip_id, XSCOM_ADDR_P9_EC(core_id, P9_CORE_WOF), core_wof); break; + case proc_gen_p10: + rc = xscom_read(chip_id, + XSCOM_ADDR_P10_EC(core_id, P10_CORE_WOF), core_wof); + break; default: rc = OPAL_HARDWARE; } @@ -394,6 +539,13 @@ static bool decode_core_fir(struct cpu_thread *cpu, loc ? loc : "Not Available", cpu->chip_id, core_id, core_fir); + if (proc_gen == proc_gen_p10) { + for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) { + if (core_fir & PPC_BIT(p10_core_fir_bits[i].bit)) + prlog(PR_INFO, " %s\n", p10_core_fir_bits[i].reason); + } + } + /* Check CORE FIR bits and populate HMI event with error info. */ for (i = 0; i < ARRAY_SIZE(xstop_bits); i++) { if (core_fir & PPC_BIT(xstop_bits[i].bit)) { @@ -910,6 +1062,7 @@ static void hmi_print_debug(const uint8_t *msg, uint64_t hmer) if (!loc) loc = "Not Available"; + /* Also covers P10 SPR_HMER_TFAC_SHADOW_XFER_ERROR */ if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) { prlog(PR_DEBUG, "[Loc: %s]: P:%d C:%d T:%d: TFMR(%016lx) %s\n", loc, this_cpu()->chip_id, core_id, thread_index, @@ -1231,10 +1384,16 @@ static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, int i; prlog(PR_DEBUG, "Core WOF = 0x%016llx recovered error:\n", core_wof); - for (i = 0; i < ARRAY_SIZE(recoverable_bits); i++) { - if (core_wof & PPC_BIT(recoverable_bits[i].bit)) - prlog(PR_DEBUG, "%s\n", - recoverable_bits[i].reason); + if (proc_gen <= proc_gen_p9) { + for (i = 0; i < ARRAY_SIZE(p9_recoverable_bits); i++) { + if (core_wof & PPC_BIT(p9_recoverable_bits[i].bit)) + prlog(PR_DEBUG, " %s\n", p9_recoverable_bits[i].reason); + } + } else if (proc_gen == proc_gen_p10) { + for (i = 0; i < ARRAY_SIZE(p10_core_fir_bits); i++) { + if (core_wof & PPC_BIT(p10_core_fir_bits[i].bit)) + prlog(PR_DEBUG, " %s\n", p10_core_fir_bits[i].reason); + } } } @@ -1245,7 +1404,8 @@ static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, queue_hmi_event(hmi_evt, recover, out_flags); } } - if (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED) { + + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_PROC_RECV_ERROR_MASKED)) { handled |= SPR_HMER_PROC_RECV_ERROR_MASKED; if (cpu_is_thread0(cpu) && hmi_evt) { hmi_evt->severity = OpalHMI_SEV_NO_ERROR; @@ -1254,6 +1414,7 @@ static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, } hmi_print_debug("Processor recovery Done (masked).", hmer); } + if (hmer & SPR_HMER_PROC_RECV_AGAIN) { handled |= SPR_HMER_PROC_RECV_AGAIN; if (cpu_is_thread0(cpu) && hmi_evt) { @@ -1264,17 +1425,30 @@ static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, hmi_print_debug("Processor recovery occurred again before" "bit2 was cleared\n", hmer); } + + /* XXX: what to do with this? */ + if (hmer & SPR_HMER_SPURR_SCALE_LIMIT) { + handled |= SPR_HMER_SPURR_SCALE_LIMIT; + if (cpu_is_thread0(cpu) && hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE; + queue_hmi_event(hmi_evt, recover, out_flags); + } + hmi_print_debug("Turbo versus nominal frequency exceeded limit.", hmer); + } + /* Assert if we see malfunction alert, we can not continue. */ if (hmer & SPR_HMER_MALFUNCTION_ALERT) { handled |= SPR_HMER_MALFUNCTION_ALERT; hmi_print_debug("Malfunction Alert", hmer); + recover = 0; if (hmi_evt) decode_malfunction(hmi_evt, out_flags); } /* Assert if we see Hypervisor resource error, we can not continue. */ - if (hmer & SPR_HMER_HYP_RESOURCE_ERR) { + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_HYP_RESOURCE_ERR)) { handled |= SPR_HMER_HYP_RESOURCE_ERR; hmi_print_debug("Hypervisor resource error", hmer); @@ -1285,7 +1459,21 @@ static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, queue_hmi_event(hmi_evt, recover, out_flags); } } - if (hmer & SPR_HMER_TRIG_FIR_HMI) { + + /* XXX: what to do with this? */ + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND)) { + handled |= SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND; + hmer &= ~SPR_HMER_THD_WAKE_BLOCKED_TM_SUSPEND; + + hmi_print_debug("Attempted to wake thread when threads in TM suspend mode.", hmer); + if (hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_PROC_RECOV_DONE, + queue_hmi_event(hmi_evt, recover, out_flags); + } + } + + if ((proc_gen <= proc_gen_p9) && (hmer & SPR_HMER_TRIG_FIR_HMI)) { handled |= SPR_HMER_TRIG_FIR_HMI; hmer &= ~SPR_HMER_TRIG_FIR_HMI; @@ -1296,6 +1484,17 @@ static int handle_hmi_exception(uint64_t hmer, struct OpalHMIEvent *hmi_evt, queue_hmi_event(hmi_evt, recover, out_flags); } } + if ((proc_gen == proc_gen_p10) && (hmer & SPR_HMER_P10_TRIG_FIR_HMI)) { + handled |= SPR_HMER_P10_TRIG_FIR_HMI; + hmer &= ~SPR_HMER_P10_TRIG_FIR_HMI; + + hmi_print_debug("Clearing unknown debug trigger", hmer); + if (hmi_evt) { + hmi_evt->severity = OpalHMI_SEV_NO_ERROR; + hmi_evt->type = OpalHMI_ERROR_DEBUG_TRIG_FIR, + queue_hmi_event(hmi_evt, recover, out_flags); + } + } if (recover == 0) disable_fast_reboot("Unrecoverable HMI"); diff --git a/core/init.c b/core/init.c index 09749f4..65f136d 100644 --- a/core/init.c +++ b/core/init.c @@ -1167,7 +1167,7 @@ void __noreturn __nomcount main_cpu_entry(const void *fdt) /* Initialize the rest of the cpu thread structs */ init_all_cpus(); - if (proc_gen == proc_gen_p9) + if (proc_gen == proc_gen_p9 || proc_gen == proc_gen_p10) cpu_set_ipi_enable(true); /* Add the /opal node to the device-tree */ @@ -65,6 +65,42 @@ static const struct mce_ierror_table mce_p9_ierror_table[] = { "instruction fetch page table access to foreign address", }, { 0 } }; +static const struct mce_ierror_table mce_p10_ierror_table[] = { +{ 0x00000000081c0000, 0x0000000000040000, + MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_INVOLVED_EA, + "instruction fetch memory uncorrectable error", }, +{ 0x00000000081c0000, 0x0000000000080000, + MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA, + "instruction fetch SLB parity error", }, +{ 0x00000000081c0000, 0x00000000000c0000, + MCE_INSNFETCH | MCE_SLB_ERROR | MCE_INVOLVED_EA, + "instruction fetch SLB multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000100000, + MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_ERAT_ERROR, + "instruction fetch ERAT multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000140000, + MCE_INSNFETCH | MCE_INVOLVED_EA | MCE_TLB_ERROR, + "instruction fetch TLB multi-hit error", }, +{ 0x00000000081c0000, 0x0000000000180000, + MCE_INSNFETCH | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access memory uncorrectable error", }, +{ 0x00000000081c0000, 0x00000000001c0000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch to control real address", }, +{ 0x00000000081c0000, 0x00000000080c0000, + MCE_INSNFETCH | MCE_INVOLVED_EA, + "instruction fetch real address error", }, +{ 0x00000000081c0000, 0x0000000008100000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access real address error", }, +{ 0x00000000081c0000, 0x0000000008140000, + MCE_LOADSTORE | MCE_IMPRECISE, + "store real address asynchronous error", }, +{ 0x00000000081c0000, 0x00000000081c0000, + MCE_INSNFETCH | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "instruction fetch page table access to control real address", }, +{ 0 } }; + struct mce_derror_table { unsigned long dsisr_value; uint64_t type; @@ -113,6 +149,42 @@ static const struct mce_derror_table mce_p9_derror_table[] = { "load/store to foreign address", }, { 0 } }; +static const struct mce_derror_table mce_p10_derror_table[] = { +{ 0x00008000, + MCE_LOADSTORE | MCE_MEMORY_ERROR, + "load/store memory uncorrectable error", }, +{ 0x00004000, + MCE_LOADSTORE | MCE_MEMORY_ERROR | MCE_TABLE_WALK | MCE_INVOLVED_EA, + "load/store page table access memory uncorrectable error", }, +{ 0x00000800, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_ERAT_ERROR, + "load/store ERAT multi-hit error", }, +{ 0x00000400, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_TLB_ERROR, + "load/store TLB multi-hit error", }, +{ 0x00000200, + MCE_TLBIE_ERROR, + "TLBIE or TLBIEL instruction programming error", }, +{ 0x00000100, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR, + "load/store SLB parity error", }, +{ 0x00000080, + MCE_LOADSTORE | MCE_INVOLVED_EA | MCE_SLB_ERROR, + "load/store SLB multi-hit error", }, +{ 0x00000040, + MCE_LOADSTORE | MCE_INVOLVED_EA, + "load real address error", }, +{ 0x00000020, + MCE_LOADSTORE | MCE_TABLE_WALK, + "load/store page table access real address error", }, +{ 0x00000010, + MCE_LOADSTORE | MCE_TABLE_WALK, + "load/store page table access to control real address", }, +{ 0x00000008, + MCE_LOADSTORE, + "load/store to control real address", }, +{ 0 } }; + static void decode_ierror(const struct mce_ierror_table table[], uint64_t srr1, uint64_t *type, @@ -145,20 +217,11 @@ static void decode_derror(const struct mce_derror_table table[], } } -void decode_mce(uint64_t srr0, uint64_t srr1, +static void decode_mce_p9(uint64_t srr0, uint64_t srr1, uint32_t dsisr, uint64_t dar, uint64_t *type, const char **error_str, uint64_t *address) { - *type = MCE_UNKNOWN; - *error_str = "unknown error"; - *address = 0; - - if (proc_gen != proc_gen_p9) { - *error_str = "unknown error (processor not supported)"; - return; - } - /* * On POWER9 DD2.1 and below, it's possible to get a machine check * caused by a paste instruction where only DSISR bit 25 is set. This @@ -198,3 +261,49 @@ void decode_mce(uint64_t srr0, uint64_t srr1, *address = srr0; } } + +static void decode_mce_p10(uint64_t srr0, uint64_t srr1, + uint32_t dsisr, uint64_t dar, + uint64_t *type, const char **error_str, + uint64_t *address) +{ + /* + * Async machine check due to bad real address from store or foreign + * link time out comes with the load/store bit (PPC bit 42) set in + * SRR1, but the cause comes in SRR1 not DSISR. Clear bit 42 so we're + * directed to the ierror table so it will find the cause (which + * describes it correctly as a store error). + */ + if (SRR1_MC_LOADSTORE(srr1) && + (srr1 & 0x081c0000) == 0x08140000) { + srr1 &= ~PPC_BIT(42); + } + + if (SRR1_MC_LOADSTORE(srr1)) { + decode_derror(mce_p10_derror_table, dsisr, type, error_str); + if (*type & MCE_INVOLVED_EA) + *address = dar; + } else { + decode_ierror(mce_p10_ierror_table, srr1, type, error_str); + if (*type & MCE_INVOLVED_EA) + *address = srr0; + } +} + +void decode_mce(uint64_t srr0, uint64_t srr1, + uint32_t dsisr, uint64_t dar, + uint64_t *type, const char **error_str, + uint64_t *address) +{ + *type = MCE_UNKNOWN; + *error_str = "unknown error"; + *address = 0; + + if (proc_gen == proc_gen_p9) { + decode_mce_p9(srr0, srr1, dsisr, dar, type, error_str, address); + } else if (proc_gen == proc_gen_p10) { + decode_mce_p10(srr0, srr1, dsisr, dar, type, error_str, address); + } else { + *error_str = "unknown error (processor not supported)"; + } +} diff --git a/core/test/run-timer.c b/core/test/run-timer.c index fef5648..8f8b20e 100644 --- a/core/test/run-timer.c +++ b/core/test/run-timer.c @@ -16,7 +16,7 @@ #define smt_lowest() #define smt_medium() -enum proc_gen proc_gen = proc_gen_p9; +enum proc_gen proc_gen = proc_gen_unknown; static uint64_t stamp, last; struct lock; |