diff options
-rw-r--r-- | core/init.c | 9 | ||||
-rw-r--r-- | hw/Makefile.inc | 2 | ||||
-rw-r--r-- | hw/xive.c | 2003 | ||||
-rw-r--r-- | include/chip.h | 4 | ||||
-rw-r--r-- | include/cpu.h | 4 | ||||
-rw-r--r-- | include/opal-api.h | 8 | ||||
-rw-r--r-- | include/xive.h | 378 |
7 files changed, 2402 insertions, 6 deletions
diff --git a/core/init.c b/core/init.c index 48f5322..d3cc7a6 100644 --- a/core/init.c +++ b/core/init.c @@ -43,6 +43,7 @@ #include <timer.h> #include <ipmi.h> #include <sensor.h> +#include <xive.h> enum proc_gen proc_gen; @@ -711,9 +712,12 @@ void __noreturn main_cpu_entry(const void *fdt, u32 master_cpu) /* Allocate our split trace buffers now. Depends add_opal_node() */ init_trace_buffers(); - /* Get the ICPs and make sure they are in a sane state */ + /* On P7/P8, get the ICPs and make sure they are in a sane state */ init_interrupts(); + /* On P9, initialize XIVE */ + init_xive(); + /* Grab centaurs from device-tree if present (only on FSP-less) */ centaur_init(); @@ -816,6 +820,9 @@ void __noreturn __secondary_cpu_entry(void) init_hid(); + /* Some XIVE setup */ + xive_cpu_callin(cpu); + /* Wait for work to do */ while(true) { int i; diff --git a/hw/Makefile.inc b/hw/Makefile.inc index a9dd9f1..9779f06 100644 --- a/hw/Makefile.inc +++ b/hw/Makefile.inc @@ -6,7 +6,7 @@ HW_OBJS += homer.o slw.o occ.o fsi-master.o centaur.o HW_OBJS += nx.o nx-rng.o nx-crypto.o nx-842.o HW_OBJS += p7ioc.o p7ioc-inits.o p7ioc-phb.o HW_OBJS += phb3.o sfc-ctrl.o fake-rtc.o bt.o p8-i2c.o prd.o -HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o +HW_OBJS += dts.o lpc-rtc.o npu.o npu-hw-procedures.o xive.o HW=hw/built-in.o include $(SRC)/hw/fsp/Makefile.inc diff --git a/hw/xive.c b/hw/xive.c new file mode 100644 index 0000000..ed30252 --- /dev/null +++ b/hw/xive.c @@ -0,0 +1,2003 @@ +/* Copyright 2016 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <skiboot.h> +#include <xscom.h> +#include <chip.h> +#include <io.h> +#include <xive.h> +#include <xscom-p9-regs.h> +#include <interrupts.h> +#include <timebase.h> + +/* Use Block group mode to move chip_id into block .... */ +#define USE_BLOCK_GROUP_MODE + +/* Indirect mode */ +#define USE_INDIRECT + +/* Always notify from EQ to VP (no EOI on EQs). Will speed up + * EOIs at the expense of potentially higher powerbus traffic. + */ +#define EQ_ALWAYS_NOTIFY + +/* Indirect VSDs are little endian (SIMICS bug ?) */ +#undef INDIRECT_IS_LE + +/* Verbose debug */ +#undef XIVE_VERBOSE_DEBUG + +/* Note on interrupt numbering: + * + * The way we represent HW interrupt numbers globaly in the system + * and in the device-tree is documented in include/interrupts.h + * + * Basically, the EAS/IVT index is the global interrupt number + */ + + +/* + * + * VSDs, blocks, set translation etc... + * + * This stuff confused me to no end so here's an attempt at explaining + * my understanding of it and how I use it in OPAL & Linux + * + * For the following data structures, the XIVE use a mechanism called + * Virtualization Structure Tables (VST) to manage the memory layout + * and access: ESBs (Event State Buffers, aka IPI sources), EAS/IVT + * (Event assignment structures), END/EQs (Notification descriptors + * aka event queues) and NVT/VPD (Notification Virtual Targets). + * + * These structures divide those tables into 16 "blocks". Each XIVE + * instance has a definition for all 16 blocks that can either represent + * an actual table in memory or a remote XIVE MMIO port to access a + * block that is owned by that remote XIVE. + * + * Our SW design will consist of allocating one block per chip (and thus + * per XIVE instance) for now, thus giving us up to 16 supported chips in + * the system. We may have to revisit that if we ever support systems with + * more than 16 chips but that isn't on our radar at the moment or if we + * want to do like pHyp on some machines and dedicate 2 blocks per chip + * for some structures. + * + * Thus we need to be careful that we never expose to Linux the concept + * of block and block boundaries, but instead we provide full number ranges + * so that consecutive blocks can be supported. + * + * We will pre-allocate some of the tables in order to support a "fallback" + * mode operations where an old-style XICS is emulated via OPAL calls. This + * is achieved by having a default of one VP per physical thread associated + * with one EQ and one IPI. There is also enought EATs to cover all the PHBs. + * + * Similarily, for MMIO access, the BARs support what is called "set + * translation" which allows tyhe BAR to be devided into a certain + * number of sets. The VC BAR (ESBs, ENDs, ...) supports 64 sets and + * the PC BAT supports 16. Each "set" can be routed to a specific + * block and offset within a block. + * + * For now, we will not use much of that functionality. We will use a + * fixed split between ESB and ENDs for the VC BAR as defined by the + * constants below and we will allocate all the PC BARs set to the + * local block of that chip + */ + + +/* BAR default values (should be initialized by HostBoot but for + * now we do it). Based on the memory map document by Dave Larson + * + * Fixed IC and TM BARs first. + */ +/* Use 64K for everything by default */ +#define IC_PAGE_SIZE 0x10000 +#define TM_PAGE_SIZE 0x10000 + +#define IC_BAR_DEFAULT 0x30203100000ull +#define IC_BAR_SIZE (8 * IC_PAGE_SIZE) +#define TM_BAR_DEFAULT 0x30203180000ull +#define TM_BAR_SIZE (4 * TM_PAGE_SIZE) + +/* VC BAR contains set translations for the ESBs and the EQs. + * + * It's divided in 64 sets, each of which can be either ESB pages or EQ pages. + * The table configuring this is the EDT + * + * Additionally, the ESB pages come in pair of Linux_Trig_Mode isn't enabled + * (which we won't enable for now as it assumes write-only permission which + * the MMU doesn't support). + * + * To get started we just hard wire the following setup: + * + * VC_BAR size is 512G. We split it into 384G of ESBs (48 sets) and 128G + * of ENDs (16 sets) for the time being. IE. Each set is thus 8GB + */ + +#define VC_BAR_DEFAULT 0x10000000000ull +#define VC_BAR_SIZE 0x08000000000ull +#define VC_ESB_SETS 48 +#define VC_END_SETS 16 +#define VC_MAX_SETS 64 + +/* PC BAR contains the virtual processors + * + * The table configuring the set translation (16 sets) is the VDT + */ +#define PC_BAR_DEFAULT 0x18000000000ull +#define PC_BAR_SIZE 0x01000000000ull +#define PC_MAX_SETS 16 + +/* XXX This is the currently top limit of number of ESB/SBE entries + * and EAS/IVT entries pre-allocated per chip. This should probably + * turn into a device-tree property or NVRAM setting, or maybe + * calculated from the amount of system RAM... + * + * This is currently set to 1M + * + * This is independent of the sizing of the MMIO space. + * + * WARNING: Due to how XICS emulation works, we cannot support more + * interrupts per chip at this stage as the full interrupt number + * (block + index) has to fit in a 24-bit number. + * + * That gives us a pre-allocated space of 256KB per chip for the state + * bits and 8M per chip for the EAS/IVT. + * + * Note: The HW interrupts from PCIe and similar other entities that + * use their own state bit array will have to share that IVT space, + * so we could potentially make the IVT size twice as big, but for now + * we will simply share it and ensure we don't hand out IPIs that + * overlap the HW interrupts. + */ +#define MAX_INT_ENTRIES (1 * 1024 * 1024) + +/* Corresponding direct table sizes */ +#define SBE_SIZE (MAX_INT_ENTRIES / 4) +#define IVT_SIZE (MAX_INT_ENTRIES * 8) + +/* Max number of EQs. We allocate an indirect table big enough so + * that when fully populated we can have that many EQs. + * + * The max number of EQs we support in our MMIO space is 128G/128K + * ie. 1M. Since one EQ is 8 words (32 bytes), a 64K page can hold + * 2K EQs. We need 512 pointers, ie, 4K of memory for the indirect + * table. + * + * XXX Adjust that based on BAR value ? + */ +#ifdef USE_INDIRECT +#define MAX_EQ_COUNT (1 * 1024 * 1024) +#define EQ_PER_PAGE (0x10000 / 32) // Use sizeof ? +#define IND_EQ_TABLE_SIZE ((MAX_EQ_COUNT / EQ_PER_PAGE) * 8) +#else +#define MAX_EQ_COUNT (4 * 1024) +#define EQT_SIZE (MAX_EQ_COUNT * 32) +#endif + + +/* Max number of VPs. We allocate an indirect table big enough so + * that when fully populated we can have that many VPs. + * + * The max number of VPs we support in our MMIO space is 64G/64K + * ie. 1M. Since one VP is 16 words (64 bytes), a 64K page can hold + * 1K EQ. We need 1024 pointers, ie, 8K of memory for the indirect + * table. + * + * HOWEVER: A block supports only up to 512K VPs (19 bits of target + * in the EQ). Since we currently only support 1 block per chip, + * we will allocate half of the above. We might add support for + * 2 blocks per chip later if necessary. + * + * XXX Adjust that based on BAR value ? + */ +#ifdef USE_INDIRECT +#define MAX_VP_COUNT (512 * 1024) +#define VP_PER_PAGE (0x10000 / 64) // Use sizeof ? +#define IND_VP_TABLE_SIZE ((MAX_VP_COUNT / VP_PER_PAGE) * 8) +#else +#define MAX_VP_COUNT (4 * 1024) +#define VPT_SIZE (MAX_VP_COUNT * 64) +#endif + +#ifdef USE_BLOCK_GROUP_MODE + +/* Initial number of VPs (XXX Make it a variable ?). Round things + * up to a max of 32 cores per chip + */ +#define INITIAL_VP_BASE 0x80 +#define INITIAL_VP_COUNT 0x80 + +#else + +/* Initial number of VPs on block 0 only */ +#define INITIAL_BLK0_VP_BASE 0x800 +#define INITIAL_BLK0_VP_COUNT (2 * 1024) + +#endif + +struct xive { + uint32_t chip_id; + struct dt_node *x_node; + struct dt_node *m_node; + + uint64_t xscom_base; + + /* MMIO regions */ + void *ic_base; + uint64_t ic_size; + uint32_t ic_shift; + void *tm_base; + uint64_t tm_size; + uint32_t tm_shift; + void *pc_base; + uint64_t pc_size; + void *vc_base; + uint64_t vc_size; + + void *esb_mmio; + void *eq_mmio; + + /* Set on XSCOM register access error */ + bool last_reg_error; + + /* Per-XIVE mutex */ + struct lock lock; + + /* Pre-allocated tables. + * + * We setup all the VDS for actual tables (ie, by opposition to + * forwarding ports) as either direct pre-allocated or indirect + * and partially populated. + * + * Currently, the ESB/SBE and the EAS/IVT tables are direct and + * fully pre-allocated based on MAX_INT_ENTRIES. + * + * The other tables are indirect, we thus pre-allocate the indirect + * table (ie, pages of pointers) and populate enough of the pages + * for our basic setup using 64K pages. + * + * The size of the indirect tables are driven by MAX_VP_COUNT and + * MAX_EQ_COUNT. The number of pre-allocated ones are driven by + * INITIAL_VP_COUNT (number of EQ depends on number of VP) in block + * mode, otherwise we only preallocate INITIAL_BLK0_VP_COUNT on + * block 0. + */ + + /* Direct SBE and IVT tables */ + void *sbe_base; + void *ivt_base; + +#ifdef USE_INDIRECT + /* Indirect END/EQ table. NULL entries are unallocated, count is + * the numbre of pointers (ie, sub page placeholders). base_count + * is the number of sub-pages that have been pre-allocated (and + * thus whose memory is owned by OPAL). + */ + uint64_t *eq_ind_base; + uint32_t eq_ind_count; + uint32_t eq_alloc_count; +#else + void *eq_base; +#endif + +#ifdef USE_INDIRECT + /* Indirect NVT/VP table. NULL entries are unallocated, count is + * the numbre of pointers (ie, sub page placeholders). + */ + uint64_t *vp_ind_base; + uint64_t vp_ind_count; +#else + void *vp_base; +#endif + /* To ease a possible change to supporting more than one block of + * interrupts per chip, we store here the "base" global number + * and max number of interrupts for this chip. The global number + * encompass the block number and index. + */ + uint32_t int_base; + uint32_t int_max; + + /* Due to the overlap between IPIs and HW sources in the IVT table, + * we keep some kind of top-down allocator. It is used for HW sources + * to "allocate" interrupt entries and will limit what can be handed + * out as IPIs. Of course this assumes we "allocate" all HW sources + * before we start handing out IPIs. + * + * Note: The numbers here are global interrupt numbers so that we can + * potentially handle more than one block per chip in the future. + */ + uint32_t int_hw_bot; /* Bottom of HW allocation */ + uint32_t int_ipi_top; /* Highest IPI handed out so far */ +}; + +/* Conversion between GIRQ and block/index. + * + * ------------------------------------ + * |00000000|BLOC| INDEX| + * ------------------------------------ + * 8 4 20 + * + * The global interrupt number is thus limited to 24 bits which is + * necessary for our XICS emulation since the top 8 bits are + * reserved for the CPPR value. + * + */ +#define GIRQ_TO_BLK(__g) (((__g) >> 24) & 0xf) +#define GIRQ_TO_IDX(__g) ((__g) & 0x00ffffff) +#define BLKIDX_TO_GIRQ(__b,__i) (((uint32_t)(__b)) << 24 | (__i)) + +/* VP IDs are just the concatenation of the BLK and index as found + * in an EQ target field for example + */ + +/* For now, it's one chip per block for both VC and PC */ +#define PC_BLK_TO_CHIP(__b) (__b) +#define VC_BLK_TO_CHIP(__b) (__b) +#define GIRQ_TO_CHIP(__isn) (VC_BLK_TO_CHIP(GIRQ_TO_BLK(__isn))) + +/* Routing of physical processors to VPs */ +#ifdef USE_BLOCK_GROUP_MODE +#define PIR2VP_IDX(__pir) (0x80 | P9_PIR2LOCALCPU(__pir)) +#define PIR2VP_BLK(__pir) (P9_PIR2GCID(__pir)) +#define VP2PIR(__blk, __idx) (P9_PIRFROMLOCALCPU(VC_BLK_TO_CHIP(__blk), (__idx) & 0x7f)) +#else +#define PIR2VP_IDX(__pir) (0x800 | (P9_PIR2GCID(__pir) << 7) | P9_PIR2LOCALCPU(__pir)) +#define PIR2VP_BLK(__pir) (0) +#define VP2PIR(__blk, __idx) (P9_PIRFROMLOCALCPU(((__idx) >> 7) & 0xf, (__idx) & 0x7f)) +#endif + +#define xive_regw(__x, __r, __v) \ + __xive_regw(__x, __r, X_##__r, __v, #__r) +#define xive_regr(__x, __r) \ + __xive_regr(__x, __r, X_##__r, #__r) +#define xive_regwx(__x, __r, __v) \ + __xive_regw(__x, 0, X_##__r, __v, #__r) +#define xive_regrx(__x, __r) \ + __xive_regr(__x, 0, X_##__r, #__r) + +#ifdef XIVE_VERBOSE_DEBUG +#define xive_vdbg(__x,__fmt,...) prlog(PR_DEBUG,"XIVE[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__) +#define xive_cpu_vdbg(__c,__fmt,...) prlog(PR_DEBUG,"XIVE[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__) +#else +#define xive_vdbg(x,fmt,...) do { } while(0) +#define xive_cpu_vdbg(x,fmt,...) do { } while(0) +#endif + +#define xive_dbg(__x,__fmt,...) prlog(PR_DEBUG,"XIVE[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__) +#define xive_cpu_dbg(__c,__fmt,...) prlog(PR_DEBUG,"XIVE[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__) +#define xive_warn(__x,__fmt,...) prlog(PR_WARNING,"XIVE[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__) +#define xive_cpu_warn(__c,__fmt,...) prlog(PR_WARNING,"XIVE[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__) +#define xive_err(__x,__fmt,...) prlog(PR_ERR,"XIVE[ IC %02x ] " __fmt, (__x)->chip_id, ##__VA_ARGS__) +#define xive_cpu_err(__c,__fmt,...) prlog(PR_ERR,"XIVE[CPU %04x] " __fmt, (__c)->pir, ##__VA_ARGS__) + +static void __xive_regw(struct xive *x, uint32_t m_reg, uint32_t x_reg, uint64_t v, + const char *rname) +{ + bool use_xscom = (m_reg == 0) || !x->ic_base; + int64_t rc; + + x->last_reg_error = false; + + if (use_xscom) { + assert(x_reg != 0); + rc = xscom_write(x->chip_id, x->xscom_base + x_reg, v); + if (rc) { + if (!rname) + rname = "???"; + xive_err(x, "Error writing register %s\n", rname); + /* Anything else we can do here ? */ + x->last_reg_error = true; + } + } else { + out_be64(x->ic_base + m_reg, v); + } +} + +static uint64_t __xive_regr(struct xive *x, uint32_t m_reg, uint32_t x_reg, + const char *rname) +{ + bool use_xscom = (m_reg == 0) || !x->ic_base; + int64_t rc; + uint64_t val; + + x->last_reg_error = false; + + if (use_xscom) { + rc = xscom_read(x->chip_id, x->xscom_base + x_reg, &val); + if (rc) { + if (!rname) + rname = "???"; + xive_err(x, "Error reading register %s\n", rname); + /* Anything else we can do here ? */ + x->last_reg_error = true; + return -1ull; + } + } else { + val = in_be64(x->ic_base + m_reg); + } + return val; +} + +/* Locate a controller from an IRQ number */ +static struct xive *xive_from_isn(uint32_t isn) +{ + uint32_t chip_id = GIRQ_TO_CHIP(isn); + struct proc_chip *c = get_chip(chip_id); + + if (!c) + return NULL; + return c->xive; +} + +/* +static struct xive *xive_from_pc_blk(uint32_t blk) +{ + uint32_t chip_id = PC_BLK_TO_CHIP(blk); + struct proc_chip *c = get_chip(chip_id); + + if (!c) + return NULL; + return c->xive; +} +*/ + +static struct xive *xive_from_vc_blk(uint32_t blk) +{ + uint32_t chip_id = VC_BLK_TO_CHIP(blk); + struct proc_chip *c = get_chip(chip_id); + + if (!c) + return NULL; + return c->xive; +} + +static struct xive_ive *xive_get_ive(struct xive *x, unsigned int isn) +{ + struct xive_ive *ivt; + uint32_t idx = GIRQ_TO_IDX(isn); + + /* Check the block matches */ + if (isn < x->int_base || isn >= x->int_max) { + xive_err(x, "xive_get_ive, ISN 0x%x not on chip\n", idx); + return NULL; + } + assert (idx < MAX_INT_ENTRIES); + + /* XXX If we support >1 block per chip, fix this */ + ivt = x->ivt_base; + assert(ivt); + + // XXX DBG + if (ivt[idx].w != 0) + xive_vdbg(x, "xive_get_ive(isn %x), idx=0x%x IVE=%016llx\n", + isn, idx, ivt[idx].w); + + return ivt + idx; +} + +static struct xive_eq *xive_get_eq(struct xive *x, unsigned int idx) +{ + struct xive_eq *p; + +#ifdef USE_INDIRECT + if (idx >= (x->eq_ind_count * EQ_PER_PAGE)) + return NULL; +#ifdef INDIRECT_IS_LE + p = (struct xive_eq *)(le64_to_cpu(x->eq_ind_base[idx / EQ_PER_PAGE]) & + VSD_ADDRESS_MASK); +#else + p = (struct xive_eq *)(x->eq_ind_base[idx / EQ_PER_PAGE] & + VSD_ADDRESS_MASK); +#endif + if (!p) + return NULL; + + return &p[idx % EQ_PER_PAGE]; +#else + if (idx >= MAX_EQ_COUNT) + return NULL; + if (!x->eq_base) + return NULL; + p = x->eq_base; + return p + idx; +#endif +} + +static struct xive_vp *xive_get_vp(struct xive *x, unsigned int idx) +{ + struct xive_vp *p; + +#ifdef USE_INDIRECT + assert(idx < (x->vp_ind_count * VP_PER_PAGE)); +#ifdef INDIRECT_IS_LE + p = (struct xive_vp *)(le64_to_cpu(x->vp_ind_base[idx / VP_PER_PAGE]) & + VSD_ADDRESS_MASK); +#else + p = (struct xive_vp *)(x->vp_ind_base[idx / VP_PER_PAGE] & + VSD_ADDRESS_MASK); +#endif + assert(p); + + return &p[idx % VP_PER_PAGE]; +#else + assert(idx < MAX_VP_COUNT); + p = x->vp_base; + return p + idx; +#endif +} + +static void xive_init_vp(struct xive *x __unused, struct xive_vp *vp __unused) +{ + /* XXX TODO: Look at the special cache line stuff */ + vp->w0 = VP_W0_VALID; +} + +static void xive_init_eq(struct xive *x __unused, uint32_t vp_idx, + struct xive_eq *eq, void *backing_page) +{ + eq->w1 = EQ_W1_GENERATION; + eq->w3 = ((uint64_t)backing_page) & 0xffffffff; + eq->w2 = (((uint64_t)backing_page)) >> 32 & 0x0fffffff; + // IS this right ? Are we limited to 2K VPs per block ? */ + eq->w6 = SETFIELD(EQ_W6_NVT_BLOCK, 0ul, x->chip_id) | + SETFIELD(EQ_W6_NVT_INDEX, 0ul, vp_idx); + eq->w7 = SETFIELD(EQ_W7_F0_PRIORITY, 0ul, 0x07); + eieio(); + eq->w0 = EQ_W0_VALID | EQ_W0_ENQUEUE | + SETFIELD(EQ_W0_QSIZE, 0ul, EQ_QSIZE_64K); +#ifdef EQ_ALWAYS_NOTIFY + eq->w0 |= EQ_W0_UCOND_NOTIFY; +#endif +} + +static uint32_t *xive_get_eq_buf(struct xive *x, uint32_t eq_blk __unused, + uint32_t eq_idx) +{ + struct xive_eq *eq = xive_get_eq(x, eq_idx); + uint64_t addr; + + assert(eq); + assert(eq->w0 & EQ_W0_VALID); + addr = (((uint64_t)eq->w2) & 0x0fffffff) << 32 | eq->w3; + + return (uint32_t *)addr; +} + +#if 0 /* Not used yet. This will be used to kill the cache + * of indirect VSDs + */ +static int64_t xive_vc_ind_cache_kill(struct xive *x, uint64_t type, + uint64_t block, uint64_t idx) +{ + uint64_t val; + + xive_regw(x, VC_AT_MACRO_KILL_MASK, + SETFIELD(VC_KILL_BLOCK_ID, 0ull, -1ull) | + SETFIELD(VC_KILL_OFFSET, 0ull, -1ull)); + xive_regw(x, VC_AT_MACRO_KILL, VC_KILL_VALID | + SETFIELD(VC_KILL_TYPE, 0ull, type) | + SETFIELD(VC_KILL_BLOCK_ID, 0ull, block) | + SETFIELD(VC_KILL_OFFSET, 0ull, idx)); + + /* XXX SIMICS problem ? */ + if (chip_quirk(QUIRK_SIMICS)) + return 0; + + /* XXX Add timeout */ + for (;;) { + val = xive_regr(x, VC_AT_MACRO_KILL); + if (!(val & VC_KILL_VALID)) + break; + } + return 0; +} +#endif + +enum xive_cache_type { + xive_cache_ivc, + xive_cache_sbc, + xive_cache_eqc, + xive_cache_vpc, +}; + +static int64_t __xive_cache_scrub(struct xive *x, enum xive_cache_type ctype, + uint64_t block, uint64_t idx, + bool want_inval, bool want_disable) +{ + uint64_t sreg, sregx, mreg, mregx; + uint64_t mval, sval; + + switch (ctype) { + case xive_cache_ivc: + sreg = VC_IVC_SCRUB_TRIG; + sregx = X_VC_IVC_SCRUB_TRIG; + mreg = VC_IVC_SCRUB_MASK; + mregx = X_VC_IVC_SCRUB_MASK; + break; + case xive_cache_sbc: + sreg = VC_SBC_SCRUB_TRIG; + sregx = X_VC_SBC_SCRUB_TRIG; + mreg = VC_SBC_SCRUB_MASK; + mregx = X_VC_SBC_SCRUB_MASK; + break; + case xive_cache_eqc: + sreg = VC_EQC_SCRUB_TRIG; + sregx = X_VC_EQC_SCRUB_TRIG; + mreg = VC_EQC_SCRUB_MASK; + mregx = X_VC_EQC_SCRUB_MASK; + break; + case xive_cache_vpc: + sreg = PC_VPC_SCRUB_TRIG; + sregx = X_PC_VPC_SCRUB_TRIG; + mreg = PC_VPC_SCRUB_MASK; + mregx = X_PC_VPC_SCRUB_MASK; + break; + } + if (ctype == xive_cache_vpc) { + mval = PC_SCRUB_BLOCK_ID | PC_SCRUB_OFFSET; + sval = SETFIELD(PC_SCRUB_BLOCK_ID, idx, block) | + PC_SCRUB_VALID; + } else { + mval = VC_SCRUB_BLOCK_ID | VC_SCRUB_OFFSET; + sval = SETFIELD(VC_SCRUB_BLOCK_ID, idx, block) | + VC_SCRUB_VALID; + } + if (want_inval) + sval |= PC_SCRUB_WANT_INVAL; + if (want_disable) + sval |= PC_SCRUB_WANT_DISABLE; + + __xive_regw(x, mreg, mregx, mval, NULL); + __xive_regw(x, sreg, sregx, sval, NULL); + + /* XXX Add timeout !!! */ + for (;;) { + sval = __xive_regr(x, sreg, sregx, NULL); + if (!(sval & VC_SCRUB_VALID)) + break; + time_wait_us(1); + } + return 0; +} + +static int64_t xive_ivc_scrub(struct xive *x, uint64_t block, uint64_t idx) +{ + return __xive_cache_scrub(x, xive_cache_ivc, block, idx, false, false); +} + +static void xive_ipi_init(struct xive *x, uint32_t idx) +{ + uint8_t *mm = x->esb_mmio + idx * 0x20000; + + /* Clear P and Q */ + in_8(mm + 0x10c00); +} + +static void xive_ipi_eoi(struct xive *x, uint32_t idx) +{ + uint8_t *mm = x->esb_mmio + idx * 0x20000; + uint8_t eoi_val; + + /* For EOI, we use the special MMIO that does a clear of both + * P and Q and returns the old Q. + * + * This allows us to then do a re-trigger if Q was set rather + * than synthetizing an interrupt in software + */ + eoi_val = in_8(mm + 0x10c00); + if (eoi_val & 1) { + out_8(mm, 0); + } +} + +static void xive_ipi_trigger(struct xive *x, uint32_t idx) +{ + uint8_t *mm = x->esb_mmio + idx * 0x20000; + + xive_vdbg(x, "Trigger IPI 0x%x\n", idx); + + out_8(mm, 0); +} + + +static bool xive_set_vsd(struct xive *x, uint32_t tbl, uint32_t idx, uint64_t v) +{ + /* Set VC version */ + xive_regw(x, VC_VSD_TABLE_ADDR, + SETFIELD(VST_TABLE_SELECT, 0ull, tbl) | + SETFIELD(VST_TABLE_OFFSET, 0ull, idx)); + if (x->last_reg_error) + return false; + xive_regw(x, VC_VSD_TABLE_DATA, v); + if (x->last_reg_error) + return false; + + /* Except for IRQ table, also set PC version */ + if (tbl == VST_TSEL_IRQ) + return true; + + xive_regw(x, PC_VSD_TABLE_ADDR, + SETFIELD(VST_TABLE_SELECT, 0ull, tbl) | + SETFIELD(VST_TABLE_OFFSET, 0ull, idx)); + if (x->last_reg_error) + return false; + xive_regw(x, PC_VSD_TABLE_DATA, v); + if (x->last_reg_error) + return false; + return true; +} + +static bool xive_set_local_tables(struct xive *x) +{ + uint64_t base; + + /* These have to be power of 2 sized */ + assert(is_pow2(SBE_SIZE)); + assert(is_pow2(IVT_SIZE)); + + /* All tables set as exclusive */ + base = SETFIELD(VSD_MODE, 0ull, VSD_MODE_EXCLUSIVE); + + /* Set IVT as direct mode */ + if (!xive_set_vsd(x, VST_TSEL_IVT, x->chip_id, base | + (((uint64_t)x->ivt_base) & VSD_ADDRESS_MASK) | + SETFIELD(VSD_TSIZE, 0ull, ilog2(IVT_SIZE) - 12))) + return false; + + /* Set SBE as direct mode */ + if (!xive_set_vsd(x, VST_TSEL_SBE, x->chip_id, base | + (((uint64_t)x->sbe_base) & VSD_ADDRESS_MASK) | + SETFIELD(VSD_TSIZE, 0ull, ilog2(SBE_SIZE) - 12))) + return false; + +#ifdef USE_INDIRECT + /* Set EQDT as indirect mode with 64K subpages */ + if (!xive_set_vsd(x, VST_TSEL_EQDT, x->chip_id, base | + (((uint64_t)x->eq_ind_base) & VSD_ADDRESS_MASK) | + VSD_INDIRECT | SETFIELD(VSD_TSIZE, 0ull, 4))) + return false; + + /* Set VPDT as indirect mode with 64K subpages */ + if (!xive_set_vsd(x, VST_TSEL_VPDT, x->chip_id, base | + (((uint64_t)x->vp_ind_base) & VSD_ADDRESS_MASK) | + VSD_INDIRECT | SETFIELD(VSD_TSIZE, 0ull, 4))) + return false; +#else + /* Set EQDT as direct mode */ + if (!xive_set_vsd(x, VST_TSEL_EQDT, x->chip_id, base | + (((uint64_t)x->eq_base) & VSD_ADDRESS_MASK) | + SETFIELD(VSD_TSIZE, 0ull, ilog2(EQT_SIZE) - 12))) + return false; + + /* Set VPDT as direct mode */ + if (!xive_set_vsd(x, VST_TSEL_VPDT, x->chip_id, base | + (((uint64_t)x->vp_base) & VSD_ADDRESS_MASK) | + SETFIELD(VSD_TSIZE, 0ull, ilog2(VPT_SIZE) - 12))) + return false; +#endif + + return true; +} + +static bool xive_read_bars(struct xive *x) +{ + uint64_t bar, msk; + + /* Read IC BAR */ + bar = xive_regrx(x, CQ_IC_BAR); + if (bar & CQ_IC_BAR_64K) + x->ic_shift = 16; + else + x->ic_shift = 12; + x->ic_size = 8ul << x->ic_shift; + x->ic_base = (void *)(bar & 0x00ffffffffffffffull); + + /* Read TM BAR */ + bar = xive_regrx(x, CQ_TM1_BAR); + assert(bar & CQ_TM_BAR_VALID); + if (bar & CQ_TM_BAR_64K) + x->tm_shift = 16; + else + x->tm_shift = 12; + x->tm_size = 4ul << x->tm_shift; + x->tm_base = (void *)(bar & 0x00ffffffffffffffull); + + /* Read PC BAR */ + bar = xive_regr(x, CQ_PC_BAR); + msk = xive_regr(x, CQ_PC_BARM) | 0xffffffc000000000ul; + assert(bar & CQ_PC_BAR_VALID); + x->pc_size = (~msk) + 1; + x->pc_base = (void *)(bar & 0x00ffffffffffffffull); + + /* Read VC BAR */ + bar = xive_regr(x, CQ_VC_BAR); + msk = xive_regr(x, CQ_VC_BARM) | 0xfffff80000000000ul; + assert(bar & CQ_VC_BAR_VALID); + x->vc_size = (~msk) + 1; + x->vc_base = (void *)(bar & 0x00ffffffffffffffull); + + return true; +} + +static bool xive_configure_bars(struct xive *x) +{ + uint64_t mmio_base, chip_base, val; + + /* Calculate MMIO base offset for that chip */ + mmio_base = 0x006000000000000ull; + chip_base = mmio_base | (0x40000000000ull * (uint64_t)x->chip_id); + + /* IC BAR. We use 4K pages here, 64K doesn't seem implemented + * in SIMCIS + */ + x->ic_base = (void *)(chip_base | IC_BAR_DEFAULT); + x->ic_size = IC_BAR_SIZE; + val = (uint64_t)x->ic_base | CQ_IC_BAR_VALID; + if (IC_PAGE_SIZE == 0x10000) { + val |= CQ_IC_BAR_64K; + x->ic_shift = 16; + } else + x->ic_shift = 12; + xive_regwx(x, CQ_IC_BAR, val); + if (x->last_reg_error) + return false; + + /* TM BAR, only configure TM1. Note that this has the same address + * for each chip !!! + */ + x->tm_base = (void *)(mmio_base | TM_BAR_DEFAULT); + x->tm_size = TM_BAR_SIZE; + val = (uint64_t)x->tm_base | CQ_TM_BAR_VALID; + if (TM_PAGE_SIZE == 0x10000) { + x->tm_shift = 16; + val |= CQ_TM_BAR_64K; + } else + x->tm_shift = 12; + xive_regwx(x, CQ_TM1_BAR, val); + if (x->last_reg_error) + return false; + xive_regwx(x, CQ_TM2_BAR, 0); + if (x->last_reg_error) + return false; + + /* PC BAR. Clear first, write mask, then write value */ + x->pc_base = (void *)(chip_base | PC_BAR_DEFAULT); + x->pc_size = PC_BAR_SIZE; + xive_regwx(x, CQ_PC_BAR, 0); + if (x->last_reg_error) + return false; + val = ~(PC_BAR_SIZE - 1) & CQ_PC_BARM_MASK; + xive_regwx(x, CQ_PC_BARM, val); + if (x->last_reg_error) + return false; + val = (uint64_t)x->pc_base | CQ_PC_BAR_VALID; + xive_regwx(x, CQ_PC_BAR, val); + if (x->last_reg_error) + return false; + + /* VC BAR. Clear first, write mask, then write value */ + x->vc_base = (void *)(chip_base | VC_BAR_DEFAULT); + x->vc_size = VC_BAR_SIZE; + xive_regwx(x, CQ_VC_BAR, 0); + if (x->last_reg_error) + return false; + val = ~(VC_BAR_SIZE - 1) & CQ_VC_BARM_MASK; + xive_regwx(x, CQ_VC_BARM, val); + if (x->last_reg_error) + return false; + val = (uint64_t)x->vc_base | CQ_VC_BAR_VALID; + xive_regwx(x, CQ_VC_BAR, val); + if (x->last_reg_error) + return false; + + return true; +} + +static void xive_dump_mmio(struct xive *x) +{ + prlog(PR_DEBUG, " CQ_CFG_PB_GEN = %016llx\n", + in_be64(x->ic_base + CQ_CFG_PB_GEN)); + prlog(PR_DEBUG, " CQ_MSGSND = %016llx\n", + in_be64(x->ic_base + CQ_MSGSND)); +} + +static bool xive_check_update_bars(struct xive *x) +{ + uint64_t val; + bool force_assign; + + /* Check if IC BAR is enabled */ + val = xive_regrx(x, CQ_IC_BAR); + if (x->last_reg_error) + return false; + + /* Check if device-tree tells us to force-assign the BARs */ + force_assign = dt_has_node_property(x->x_node, + "force-assign-bars", NULL); + if ((val & CQ_IC_BAR_VALID) && !force_assign) { + xive_dbg(x, "IC BAR valid, using existing values\n"); + if (!xive_read_bars(x)) + return false; + } else { + xive_warn(x, "IC BAR invalid, reconfiguring\n"); + if (!xive_configure_bars(x)) + return false; + } + + /* Calculate some MMIO bases in the VC BAR */ + x->esb_mmio = x->vc_base; + x->eq_mmio = x->vc_base + (x->vc_size / VC_MAX_SETS) * VC_ESB_SETS; + + /* Print things out */ + xive_dbg(x, "IC: %14p [0x%012llx/%d]\n", x->ic_base, x->ic_size, x->ic_shift); + xive_dbg(x, "TM: %14p [0x%012llx/%d]\n", x->tm_base, x->tm_size, x->tm_shift); + xive_dbg(x, "PC: %14p [0x%012llx]\n", x->pc_base, x->pc_size); + xive_dbg(x, "VC: %14p [0x%012llx]\n", x->vc_base, x->vc_size); + + return true; +} + +static bool xive_config_init(struct xive *x) +{ + uint64_t val __unused; + + /* Configure PC and VC page sizes and disable Linux trigger mode */ + xive_regwx(x, CQ_PBI_CTL, CQ_PBI_PC_64K | CQ_PBI_VC_64K); + if (x->last_reg_error) + return false; + + /*** The rest can use MMIO ***/ + +#ifdef USE_INDIRECT + /* Enable indirect mode in VC config */ + val = xive_regr(x, VC_GLOBAL_CONFIG); + val |= VC_GCONF_INDIRECT; + xive_regw(x, VC_GLOBAL_CONFIG, val); + + /* Enable indirect mode in PC config */ + val = xive_regr(x, PC_GLOBAL_CONFIG); + val |= PC_GCONF_INDIRECT; + xive_regw(x, PC_GLOBAL_CONFIG, val); +#endif + +#ifdef USE_BLOCK_GROUP_MODE + val = xive_regr(x, PC_TCTXT_CFG); + val |= PC_TCTXT_CFG_BLKGRP_EN | PC_TCTXT_CFG_HARD_CHIPID_BLK; + xive_regw(x, PC_TCTXT_CFG, val); +#endif + return true; +} + +static bool xive_setup_set_xlate(struct xive *x) +{ + unsigned int i; + + /* Configure EDT for ESBs (aka IPIs) */ + xive_regw(x, CQ_TAR, CQ_TAR_TBL_AUTOINC | CQ_TAR_TSEL_EDT); + if (x->last_reg_error) + return false; + for (i = 0; i < VC_ESB_SETS; i++) { + xive_regw(x, CQ_TDR, + /* IPI type */ + (1ull << 62) | + /* block is chip_ID */ + (((uint64_t)x->chip_id) << 48) | + /* offset */ + (((uint64_t)i) << 32)); + if (x->last_reg_error) + return false; + } + + /* Configure EDT for ENDs (aka EQs) */ + for (i = 0; i < VC_END_SETS; i++) { + xive_regw(x, CQ_TDR, + /* EQ type */ + (2ull << 62) | + /* block is chip_ID */ + (((uint64_t)x->chip_id) << 48) | + /* offset */ + (((uint64_t)i) << 32)); + if (x->last_reg_error) + return false; + } + + /* Configure VDT */ + xive_regw(x, CQ_TAR, CQ_TAR_TBL_AUTOINC | CQ_TAR_TSEL_VDT); + if (x->last_reg_error) + return false; + for (i = 0; i < PC_MAX_SETS; i++) { + xive_regw(x, CQ_TDR, + /* Valid bit */ + (1ull << 63) | + /* block is chip_ID */ + (((uint64_t)x->chip_id) << 48) | + /* offset */ + (((uint64_t)i) << 32)); + if (x->last_reg_error) + return false; + } + return true; +} + +static struct xive_vp *xive_alloc_init_vp(struct xive *x, unsigned int idx) +{ + struct xive_vp *vp = xive_get_vp(x, idx); + struct xive_eq *eq = xive_get_eq(x, idx); + void *p; + + assert(vp); + assert(eq); + + xive_init_vp(x, vp); + + p = local_alloc(x->chip_id, 0x10000, 0x10000); + if (!p) { + xive_err(x, "Failed to allocate EQ backing store\n"); + return NULL; + } + xive_init_eq(x, idx, eq, p); + + return vp; +} + +static bool xive_prealloc_tables(struct xive *x) +{ + unsigned int i, vp_init_count, vp_init_base; + unsigned int pbase __unused, pend __unused; + uint64_t al __unused; + + /* ESB/SBE has 4 entries per byte */ + x->sbe_base = local_alloc(x->chip_id, SBE_SIZE, SBE_SIZE); + if (!x->sbe_base) { + xive_err(x, "Failed to allocate SBE\n"); + return false; + } + /* SBEs are initialized to 0b01 which corresponds to "ints off" */ + memset(x->sbe_base, 0x55, SBE_SIZE); + + /* EAS/IVT entries are 8 bytes */ + x->ivt_base = local_alloc(x->chip_id, IVT_SIZE, IVT_SIZE); + if (!x->ivt_base) { + xive_err(x, "Failed to allocate IVT\n"); + return false; + } + /* We clear the entries (non-valid). They will be initialized + * when actually used + */ + memset(x->ivt_base, 0, IVT_SIZE); + +#ifdef USE_INDIRECT + /* Indirect EQ table. (XXX Align to 64K until I figure out the + * HW requirements) + */ + al = (IND_EQ_TABLE_SIZE + 0xffff) & ~0xffffull; + x->eq_ind_base = local_alloc(x->chip_id, al, al); + if (!x->eq_ind_base) { + xive_err(x, "Failed to allocate EQ indirect table\n"); + return false; + } + memset(x->eq_ind_base, 0, al); + x->eq_ind_count = IND_EQ_TABLE_SIZE / 8; + + /* Indirect VP table. (XXX Align to 64K until I figure out the + * HW requirements) + */ + al = (IND_VP_TABLE_SIZE + 0xffff) & ~0xffffull; + x->vp_ind_base = local_alloc(x->chip_id, al, al); + if (!x->vp_ind_base) { + xive_err(x, "Failed to allocate VP indirect table\n"); + return false; + } + x->vp_ind_count = IND_VP_TABLE_SIZE / 8; + memset(x->vp_ind_base, 0, al); + +#else /* USE_INDIRECT */ + + x->eq_base = local_alloc(x->chip_id, EQT_SIZE, EQT_SIZE); + if (!x->eq_base) { + xive_err(x, "Failed to allocate EQ table\n"); + return false; + } + memset(x->eq_base, 0, EQT_SIZE); + + /* EAS/IVT entries are 8 bytes */ + x->vp_base = local_alloc(x->chip_id, VPT_SIZE, VPT_SIZE); + if (!x->vp_base) { + xive_err(x, "Failed to allocate VP table\n"); + return false; + } + /* We clear the entries (non-valid). They will be initialized + * when actually used + */ + memset(x->vp_base, 0, VPT_SIZE); + +#endif /* USE_INDIRECT */ + + /* Populate/initialize VP/EQs */ +#ifdef USE_BLOCK_GROUP_MODE + vp_init_count = INITIAL_VP_COUNT; + vp_init_base = INITIAL_VP_BASE; +#else + vp_init_count = x->chip_id == 0 ? INITIAL_BLK0_VP_COUNT : 0; + vp_init_base = INITIAL_BLK0_VP_BASE; +#endif + +#ifdef USE_INDIRECT + /* Allocate pages for some VPs and EQs in indirect mode */ + pbase = vp_init_base / VP_PER_PAGE; + pend = (vp_init_base + vp_init_count) / VP_PER_PAGE; + xive_dbg(x, "Allocating pages %d to %d of VPs (for %d VPs)\n", + pbase, pend, INITIAL_VP_COUNT); + for (i = pbase; i <= pend; i++) { + void *page; + + /* Indirect entries have a VSD format */ + page = local_alloc(x->chip_id, 0x10000, 0x10000); + if (!page) { + xive_err(x, "Failed to allocate VP page\n"); + return false; + } + memset(page, 0, 0x10000); + x->vp_ind_base[i] = ((uint64_t)page) & VSD_ADDRESS_MASK; + x->vp_ind_base[i] |= SETFIELD(VSD_TSIZE, 0ull, 4); + + page = local_alloc(x->chip_id, 0x10000, 0x10000); + if (!page) { + xive_err(x, "Failed to allocate EQ page\n"); + return false; + } + memset(page, 0, 0x10000); + x->eq_ind_base[i] = ((uint64_t)page) & VSD_ADDRESS_MASK; + x->eq_ind_base[i] |= SETFIELD(VSD_TSIZE, 0ull, 4); + +#ifdef INDIRECT_IS_LE + x->vp_ind_base[i] = cpu_to_le64(x->vp_ind_base[i]); + x->eq_ind_base[i] = cpu_to_le64(x->eq_ind_base[i]); +#endif + } +#endif /* USE_INDIRECT */ + + /* Allocate the initial EQs backing store and initialize EQs and VPs */ + for (i = vp_init_base; i < (vp_init_base + vp_init_count); i++) + if (xive_alloc_init_vp(x, i) == NULL) { + xive_err(x, "Base VP initialization failed\n"); + return false; + } + + return true; +} + +static void xive_create_mmio_dt_node(struct xive *x) +{ + x->m_node = dt_new_addr(dt_root, "interrupt-controller", + (uint64_t)x->ic_base); + assert(x->m_node); + + dt_add_property_u64s(x->m_node, "reg", + (uint64_t)x->ic_base, x->ic_size, + (uint64_t)x->tm_base, x->tm_size, + (uint64_t)x->pc_base, x->pc_size, + (uint64_t)x->vc_base, x->vc_size); + + /* XXX Only put in "ibm,power9-xive" when we support the exploitation + * related APIs and properties + */ + dt_add_property_strings(x->m_node, "compatible", /*"ibm,power9-xive",*/ "ibm,opal-intc"); + + dt_add_property_cells(x->m_node, "ibm,xive-max-sources", + MAX_INT_ENTRIES); +} + +static void late_init_one_xive(struct xive *x __unused) +{ + // XXX Setup fwd ports +} + +uint32_t xive_alloc_hw_irqs(uint32_t chip_id, uint32_t count, uint32_t align) +{ + struct proc_chip *chip = get_chip(chip_id); + struct xive *x; + uint32_t base, i; + + assert(chip); + assert(is_pow2(align)); + + x = chip->xive; + assert(x); + + /* Allocate the HW interrupts */ + base = x->int_hw_bot - count; + base &= ~(align - 1); + if (base < x->int_ipi_top) { + xive_err(x, + "HW alloc request for %d interrupts aligned to %d failed\n", + count, align); + return XIVE_IRQ_ERROR; + } + x->int_hw_bot = base; + + /* Initialize the corresponding IVT entries to sane defaults, + * IE entry is valid, not routed and masked, EQ data is set + * to the GIRQ number. + */ + for (i = 0; i < count; i++) { + struct xive_ive *ive = xive_get_ive(x, base + i); + + ive->w = IVE_VALID | IVE_MASKED | SETFIELD(IVE_EQ_DATA, 0ul, base + i); + } + return base; +} + +uint32_t xive_alloc_ipi_irqs(uint32_t chip_id, uint32_t count, uint32_t align) +{ + struct proc_chip *chip = get_chip(chip_id); + struct xive *x; + uint32_t base, i; + + assert(chip); + assert(is_pow2(align)); + + x = chip->xive; + assert(x); + + /* Allocate the IPI interrupts */ + base = x->int_ipi_top + (align - 1); + base &= ~(align - 1); + if (base >= x->int_hw_bot) { + xive_err(x, + "IPI alloc request for %d interrupts aligned to %d failed\n", + count, align); + return XIVE_IRQ_ERROR; + } + x->int_ipi_top = base + count; + + /* Initialize the corresponding IVT entries to sane defaults, + * IE entry is valid, not routed and masked, EQ data is set + * to the GIRQ number. + */ + for (i = 0; i < count; i++) { + struct xive_ive *ive = xive_get_ive(x, base + i); + + ive->w = IVE_VALID | IVE_MASKED | SETFIELD(IVE_EQ_DATA, 0ul, base + i); + } + + return base; +} + +uint64_t xive_get_notify_port(uint32_t chip_id, uint32_t ent) +{ + struct proc_chip *chip = get_chip(chip_id); + struct xive *x; + uint32_t offset = 0; + + assert(chip); + x = chip->xive; + assert(x); + + /* This is where we can assign a different HW queue to a different + * source by offsetting into the cache lines of the notify port + * + * For now we keep it very basic, this will have to be looked at + * again on real HW with some proper performance analysis. + * + * Here's what Florian says on the matter: + * + * << + * The first 2k of the notify port page can all be used for PCIe triggers + * + * However the idea would be that we try to use the first 4 cache lines to + * balance the PCIe Interrupt requests to use the least used snoop buses + * (we went from 2 to 4 snoop buses for P9). snoop 0 is heavily used + * (I think TLBIs are using that in addition to the normal addresses), + * snoop 3 is used for all Int commands, so I think snoop 2 (CL 2 in the + * page) is the least used overall. So we probably should that one for + * the Int commands from PCIe. + * + * In addition, our EAS cache supports hashing to provide "private" cache + * areas for the PHBs in the shared 1k EAS cache. This allows e.g. to avoid + * that one "thrashing" PHB thrashes the EAS cache for everyone, or provide + * a PHB with a private area that would allow high cache hits in case of a + * device using very few interrupts. The hashing is based on the offset within + * the cache line. So using that, you can e.g. set the EAS cache up so that + * IPIs use 512 entries, the x16 PHB uses 256 entries and the x8 PHBs 128 + * entries each - or IPIs using all entries and sharing with PHBs, so PHBs + * would use 512 entries and 256 entries respectively. + * + * This is a tuning we would probably do later in the lab, but as a "prep" + * we should set up the different PHBs such that they are using different + * 8B-aligned offsets within the cache line, so e.g. + * PH4_0 addr 0x100 (CL 2 DW0 + * PH4_1 addr 0x108 (CL 2 DW1) + * PH4_2 addr 0x110 (CL 2 DW2) + * etc. + * >> + */ + switch(ent) { + case XIVE_HW_SRC_PHBn(0): + offset = 0x100; + break; + case XIVE_HW_SRC_PHBn(1): + offset = 0x108; + break; + case XIVE_HW_SRC_PHBn(2): + offset = 0x110; + break; + case XIVE_HW_SRC_PHBn(3): + offset = 0x118; + break; + case XIVE_HW_SRC_PHBn(4): + offset = 0x120; + break; + case XIVE_HW_SRC_PHBn(5): + offset = 0x128; + break; + case XIVE_HW_SRC_PSI: + offset = 0x130; + break; + default: + assert(false); + return 0; + } + + /* Notify port is the second page of the IC BAR */ + return ((uint64_t)x->ic_base) + (1ul << x->ic_shift) + offset; +} + +static void init_one_xive(struct dt_node *np) +{ + struct xive *x; + struct proc_chip *chip; + + x = zalloc(sizeof(struct xive)); + assert(x); + x->xscom_base = dt_get_address(np, 0, NULL); + x->chip_id = dt_get_chip_id(np); + x->x_node = np; + init_lock(&x->lock); + + chip = get_chip(x->chip_id); + assert(chip); + xive_dbg(x, "Initializing...\n"); + chip->xive = x; + + /* Base interrupt numbers and allocator init */ + x->int_base = BLKIDX_TO_GIRQ(x->chip_id, 0); + x->int_max = x->int_base + MAX_INT_ENTRIES; + x->int_hw_bot = x->int_max; + x->int_ipi_top = x->int_base; + + /* Make sure we never hand out "2" as it's reserved for XICS emulation + * IPI returns. Generally start handing out at 0x10 + */ + if (x->int_ipi_top < 0x10) + x->int_ipi_top = 0x10; + + xive_dbg(x, "Handling interrupts [%08x..%08x]\n", x->int_base, x->int_max - 1); + + /* System dependant values that must be set before BARs */ + //xive_regwx(x, CQ_CFG_PB_GEN, xx); + //xive_regwx(x, CQ_MSGSND, xx); + + /* Verify the BARs are initialized and if not, setup a default layout */ + xive_check_update_bars(x); + + /* Some basic global inits such as page sizes etc... */ + if (!xive_config_init(x)) + goto fail; + + /* Configure the set translations for MMIO */ + if (!xive_setup_set_xlate(x)) + goto fail; + + /* Dump some MMIO registers for diagnostics */ + xive_dump_mmio(x); + + /* Pre-allocate a number of tables */ + if (!xive_prealloc_tables(x)) + goto fail; + + /* Configure local tables in VSDs (forward ports will be handled later) */ + if (!xive_set_local_tables(x)) + goto fail; + + /* Create a device-tree node for Linux use */ + xive_create_mmio_dt_node(x); + + return; + fail: + xive_err(x, "Initialization failed...\n"); + + /* Should this be fatal ? */ + //assert(false); +} + +/* + * XICS emulation + */ +struct xive_cpu_state { + struct xive *xive; + void *tm_ring1; + uint32_t vp_blk; + uint32_t vp_idx; + struct lock lock; + uint8_t cppr; + uint8_t mfrr; + uint8_t pending; + uint8_t prev_cppr; + uint32_t *eqbuf; + uint32_t eqidx; + uint32_t eqmsk; + uint8_t eqgen; + void *eqmmio; + uint32_t ipi_irq; +}; + +void xive_cpu_callin(struct cpu_thread *cpu) +{ + struct xive_cpu_state *xs = cpu->xstate; + struct proc_chip *chip = get_chip(cpu->chip_id); + struct xive *x = chip->xive; + uint32_t fc, bit; + + if (!xs) + return; + + /* First enable us in PTER. We currently assume that the + * PIR bits can be directly used to index in PTER. That might + * need to be verified + */ + + /* Get fused core number */ + fc = (cpu->pir >> 3) & 0xf; + /* Get bit in register */ + bit = cpu->pir & 0x3f; + /* Get which register to access */ + if (fc < 8) + xive_regw(x, PC_THREAD_EN_REG0_SET, PPC_BIT(bit)); + else + xive_regw(x, PC_THREAD_EN_REG1_SET, PPC_BIT(bit)); + + /* Set CPPR to 0 */ + out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_CPPR, 0); + + /* Set VT to 1 */ + out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_WORD2, 0x80); + + xive_cpu_dbg(cpu, "Initialized interrupt management area\n"); + + /* Now unmask the IPI */ + xive_ipi_init(x, GIRQ_TO_IDX(xs->ipi_irq)); +} + +static void xive_init_cpu(struct cpu_thread *c) +{ + struct proc_chip *chip = get_chip(c->chip_id); + struct xive *x = chip->xive; + struct xive_cpu_state *xs; + + if (!x) + return; + + /* First, if we are the first CPU of an EX pair, we need to + * setup the special BAR + */ + /* XXX This is very P9 specific ... */ + if ((c->pir & 0x7) == 0) { + uint64_t xa, val; + int64_t rc; + + xive_cpu_dbg(c, "Setting up special BAR\n"); + xa = XSCOM_ADDR_P9_EX(pir_to_core_id(c->pir), P9X_EX_NCU_SPEC_BAR); + printf("NCU_SPEC_BAR_XA=%08llx\n", xa); + val = (uint64_t)x->tm_base | P9X_EX_NCU_SPEC_BAR_ENABLE; + if (x->tm_shift == 16) + val |= P9X_EX_NCU_SPEC_BAR_256K; + rc = xscom_write(c->chip_id, xa, val); + if (rc) { + xive_cpu_err(c, "Failed to setup NCU_SPEC_BAR\n"); + /* XXXX what do do now ? */ + } + } + + /* Initialize the state structure */ + c->xstate = xs = local_alloc(c->chip_id, sizeof(struct xive_cpu_state), 1); + assert(xs); + xs->xive = x; + + init_lock(&xs->lock); + + xs->vp_blk = PIR2VP_BLK(c->pir); + xs->vp_idx = PIR2VP_IDX(c->pir); + xs->cppr = 0; + xs->mfrr = 0xff; + + /* XXX Find the one eq buffer associated with the VP, for now same BLK/ID */ + xs->eqbuf = xive_get_eq_buf(x, xs->vp_blk, xs->vp_idx); + xs->eqidx = 0; + xs->eqmsk = (0x10000/4) - 1; + xs->eqgen = false; + xs->eqmmio = x->eq_mmio + xs->vp_idx * 0x20000; + assert(xs->eqbuf); + + /* Shortcut to TM HV ring */ + xs->tm_ring1 = x->tm_base + (1u << x->tm_shift); + + /* Allocate an IPI */ + xs->ipi_irq = xive_alloc_ipi_irqs(c->chip_id, 1, 1); + xive_set_eq_info(xs->ipi_irq, c->pir, 0x7); + xive_cpu_dbg(c, "CPU IPI is irq %08x\n", xs->ipi_irq); +} + +bool xive_get_eq_info(uint32_t isn, uint32_t *out_target, uint8_t *out_prio) +{ + struct xive_ive *ive; + struct xive *x, *eq_x; + struct xive_eq *eq; + uint32_t eq_blk, eq_idx; + uint32_t vp_blk, vp_idx; + uint32_t prio, server; + + /* Find XIVE on which the IVE resides */ + x = xive_from_isn(isn); + if (!x) + return false; + /* Grab the IVE */ + ive = xive_get_ive(x, isn); + if (!ive) + return false; + if (!(ive->w & IVE_VALID)) { + xive_err(x, "ISN %x lead to invalid IVE !\n", isn); + return false; + } + /* Find the EQ and its xive instance */ + eq_blk = GETFIELD(IVE_EQ_BLOCK, ive->w); + eq_idx = GETFIELD(IVE_EQ_INDEX, ive->w); + eq_x = xive_from_vc_blk(eq_blk); + if (!eq_x) { + xive_err(x, "Can't find controller for EQ BLK %d\n", eq_blk); + return false; + } + eq = xive_get_eq(eq_x, eq_idx); + if (!eq) { + xive_err(eq_x, "Can't locate EQ %d\n", eq_idx); + return false; + } + /* XXX Check valid and format 0 */ + + /* No priority conversion, return the actual one ! */ + prio = GETFIELD(EQ_W7_F0_PRIORITY, eq->w7); + if (out_prio) + *out_prio = prio; + + vp_blk = GETFIELD(EQ_W6_NVT_BLOCK, eq->w6); + vp_idx = GETFIELD(EQ_W6_NVT_INDEX, eq->w6); + server = VP2PIR(vp_blk, vp_idx); + + if (out_target) + *out_target = server; + xive_vdbg(eq_x, "EQ info for ISN %x: prio=%d, server=0x%x (VP %x/%x)\n", + isn, prio, server, vp_blk, vp_idx); + return true; +} + +static inline bool xive_eq_for_target(uint32_t target, uint8_t prio __unused, + uint32_t *eq_blk, uint32_t *eq_idx) +{ + uint32_t vp_blk = PIR2VP_BLK(target); + uint32_t vp_idx = PIR2VP_IDX(target); + + /* XXX We currently have EQ BLK/IDX == VP BLK/IDX. This will change + * when we support priorities. + */ + if (eq_blk) + *eq_blk = vp_blk; + if (eq_idx) + *eq_idx = vp_idx; + return true; +} + +bool xive_set_eq_info(uint32_t isn, uint32_t target, uint8_t prio) +{ + struct xive *x; + struct xive_ive *ive; + uint32_t eq_blk, eq_idx; + + /* Find XIVE on which the IVE resides */ + x = xive_from_isn(isn); + if (!x) + return false; + /* Grab the IVE */ + ive = xive_get_ive(x, isn); + if (!ive) + return false; + if (!(ive->w & IVE_VALID)) { + xive_err(x, "ISN %x lead to invalid IVE !\n", isn); + return false; + } + + /* Are we masking ? */ + if (prio == 0xff) { + /* Masking, just set the M bit */ + ive->w |= IVE_MASKED; + + xive_vdbg(x, "ISN %x masked !\n", isn); + } else { + uint64_t new_ive; + + /* Unmasking, re-target the IVE. First find the EQ + * correponding to the target + */ + if (!xive_eq_for_target(target, prio, &eq_blk, &eq_idx)) { + xive_err(x, "Can't find EQ for target/prio 0x%x/%d\n", + target, prio); + return false; + } + + /* Try to update it atomically to avoid an intermediary + * stale state + */ + new_ive = ive->w & ~IVE_MASKED; + new_ive = SETFIELD(IVE_EQ_BLOCK, new_ive, eq_blk); + new_ive = SETFIELD(IVE_EQ_INDEX, new_ive, eq_idx); + sync(); + ive->w = new_ive; + + xive_vdbg(x,"ISN %x routed to eq %x/%x IVE=%016llx !\n", + isn, eq_blk, eq_idx, new_ive); + } + + /* Scrub IVE from cache */ + xive_ivc_scrub(x, x->chip_id, GIRQ_TO_IDX(isn)); + + return true; +} + + +static uint32_t xive_read_eq(struct xive_cpu_state *xs, bool just_peek) +{ + uint32_t cur; + + xive_cpu_vdbg(this_cpu(), " EQ %s... IDX=%x MSK=%x G=%d\n", + just_peek ? "peek" : "read", + xs->eqidx, xs->eqmsk, xs->eqgen); + cur = xs->eqbuf[xs->eqidx]; + xive_cpu_vdbg(this_cpu(), " cur: %08x [%08x %08x %08x ...]\n", cur, + xs->eqbuf[(xs->eqidx + 1) & xs->eqmsk], + xs->eqbuf[(xs->eqidx + 2) & xs->eqmsk], + xs->eqbuf[(xs->eqidx + 3) & xs->eqmsk]); + if ((cur >> 31) == xs->eqgen) + return 0; + if (!just_peek) { + xs->eqidx = (xs->eqidx + 1) & xs->eqmsk; + if (xs->eqidx == 0) + xs->eqgen = !xs->eqgen; + } + return cur & 0x00ffffff; +} + +static uint8_t xive_sanitize_cppr(uint8_t cppr) +{ + if (cppr == 0xff || cppr == 0) + return cppr; + else + return 7; +} + +static inline uint8_t opal_xive_check_pending(struct xive_cpu_state *xs, + uint8_t cppr) +{ + uint8_t mask = (cppr > 7) ? 0xff : ((1 << cppr) - 1); + + return xs->pending & mask; +} + +static int64_t opal_xive_eoi(uint32_t xirr) +{ + struct cpu_thread *c = this_cpu(); + struct xive_cpu_state *xs = c->xstate; + uint32_t isn = xirr & 0x00ffffff; + uint8_t cppr, irqprio; + struct xive *src_x; + bool special_ipi = false; + + if (!xs) + return OPAL_INTERNAL_ERROR; + + xive_cpu_vdbg(c, "EOI xirr=%08x cur_cppr=%d\n", xirr, xs->cppr); + + /* Limit supported CPPR values from OS */ + cppr = xive_sanitize_cppr(xirr >> 24); + + lock(&xs->lock); + + /* Snapshor current CPPR, it's assumed to be our IRQ priority */ + irqprio = xs->cppr; + + /* If this was our magic IPI, convert to IRQ number */ + if (isn == 2) { + isn = xs->ipi_irq; + special_ipi = true; + xive_cpu_vdbg(c, "User EOI for IPI !\n"); + } + + /* First check if we have stuff in that queue. If we do, don't bother with + * doing an EOI on the EQ. Just mark that priority pending, we'll come + * back later. + * + * If/when supporting multiple queues we would have to check them all + * in ascending prio order up to the passed-in CPPR value (exclusive). + */ + if (xive_read_eq(xs, true)) { + xive_cpu_vdbg(c, " isn %08x, skip, queue non empty\n", xirr); + xs->pending |= 1 << irqprio; + } +#ifndef EQ_ALWAYS_NOTIFY + else { + uint8_t eoi_val; + + /* Perform EQ level EOI. Only one EQ for now ... + * + * Note: We aren't doing an actual EOI. Instead we are clearing + * both P and Q and will re-check the queue if Q was set. + */ + eoi_val = in_8(xs->eqmmio + 0xc00); + xive_cpu_vdbg(c, " isn %08x, eoi_val=%02x\n", xirr, eoi_val); + + /* Q was set ? Check EQ again after doing a sync to ensure + * ordering. + */ + if (eoi_val & 1) { + sync(); + if (xive_read_eq(xs, true)) + xs->pending |= 1 << irqprio; + } + } +#endif + + /* Perform source level EOI if it's a HW interrupt, otherwise, + * EOI ourselves + */ + src_x = xive_from_isn(isn); + if (src_x) { + uint32_t idx = GIRQ_TO_IDX(isn); + + /* Is it an IPI ? */ + if (idx < src_x->int_ipi_top) { + xive_vdbg(src_x, "EOI of IDX %x in IPI range\n", idx); + xive_ipi_eoi(src_x, idx); + + /* It was a special IPI, check mfrr and eventually + * re-trigger. We check against the new CPPR since + * we are about to update the HW. + */ + if (special_ipi && xs->mfrr < cppr) + xive_ipi_trigger(src_x, idx); + } else { + xive_vdbg(src_x, "EOI of IDX %x in EXT range\n", idx); + irq_source_eoi(isn); + } + } else { + xive_cpu_err(c, " EOI unknown ISN %08x\n", isn); + } + + /* Finally restore CPPR */ + xs->cppr = cppr; + out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_CPPR, cppr); + + xive_cpu_vdbg(c, " pending=0x%x cppr=%d\n", xs->pending, cppr); + + unlock(&xs->lock); + + /* Return whether something is pending that is suitable for + * delivery considering the new CPPR value. This can be done + * without lock as these fields are per-cpu. + */ + return opal_xive_check_pending(xs, cppr); +} + +static int64_t opal_xive_get_xirr(uint32_t *out_xirr, bool just_poll) +{ + struct cpu_thread *c = this_cpu(); + struct xive_cpu_state *xs = c->xstate; + uint16_t ack; + uint8_t active, old_cppr; + + if (!xs) + return OPAL_INTERNAL_ERROR; + if (!out_xirr) + return OPAL_PARAMETER; + + *out_xirr = 0; + + lock(&xs->lock); + + /* + * Due to the need to fetch multiple interrupts from the EQ, we + * need to play some tricks. + * + * The "pending" byte in "xs" keeps track of the priorities that + * are known to have stuff to read (currently we only use one). + * + * It is set in EOI and cleared when consumed here. We don't bother + * looking ahead here, EOI will do it. + * + * We do need to still do an ACK every time in case a higher prio + * exception occurred (though we don't do prio yet... right ? still + * let's get the basic design right !). + * + * Note that if we haven't found anything via ack, but did find + * something in the queue, we must also raise CPPR back. + */ + + /* Perform the HV Ack cycle */ + if (just_poll) + ack = in_be64(xs->tm_ring1 + TM_QW3_HV_PHYS) >> 48; + else + ack = in_be16(xs->tm_ring1 + TM_SPC_ACK_HV_REG); + xive_cpu_vdbg(c, "get_xirr,%s=%04x\n", just_poll ? "POLL" : "ACK", ack); + + /* Capture the old CPPR which we will return with the interrupt */ + old_cppr = xs->cppr; + + switch(GETFIELD(TM_QW3_NSR_HE, (ack >> 8))) { + case TM_QW3_NSR_HE_NONE: + break; + case TM_QW3_NSR_HE_POOL: + break; + case TM_QW3_NSR_HE_PHYS: + /* Mark pending and keep track of the CPPR update */ + if (!just_poll) { + xs->cppr = ack & 0xff; + xs->pending |= 1 << xs->cppr; + } + break; + case TM_QW3_NSR_HE_LSI: + break; + } + + /* Calculate "active" lines as being the pending interrupts + * masked by the "old" CPPR + */ + active = opal_xive_check_pending(xs, old_cppr); + + xive_cpu_vdbg(c, " cppr=%d->%d pending=0x%x active=%x\n", + old_cppr, xs->cppr, xs->pending, active); + if (active) { + /* Find highest pending */ + uint8_t prio = ffs(active) - 1; + uint32_t val; + + /* XXX Use "p" to select queue */ + val = xive_read_eq(xs, just_poll); + + /* Convert to magic IPI if needed */ + if (val == xs->ipi_irq) + val = 2; + + *out_xirr = (old_cppr << 24) | val; + + /* If we are polling, that's it */ + if (just_poll) + goto skip; + + /* Clear the pending bit. EOI will set it again if needed. We + * could check the queue but that's not really critical here. + */ + xs->pending &= ~(1 << prio); + + /* There should always be an interrupt here I think, unless + * some race occurred, but let's be safe. If we don't find + * anything, we just return. + */ + if (!val) + goto skip; + + xive_cpu_vdbg(c, " found irq, prio=%d\n", prio); + + /* We could have fetched a pending interrupt left over + * by a previous EOI, so the CPPR might need adjusting + */ + if (xs->cppr > prio) { + xs->cppr = prio; + out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_CPPR, prio); + xive_cpu_vdbg(c, " adjusted CPPR\n"); + } + } + skip: + + xive_cpu_vdbg(c, " returning XIRR=%08x, pending=0x%x\n", + *out_xirr, xs->pending); + + unlock(&xs->lock); + + return OPAL_SUCCESS; +} + +static int64_t opal_xive_set_cppr(uint8_t cppr) +{ + struct cpu_thread *c = this_cpu(); + struct xive_cpu_state *xs = c->xstate; + + /* Limit supported CPPR values */ + cppr = xive_sanitize_cppr(cppr); + + if (!xs) + return OPAL_INTERNAL_ERROR; + xive_cpu_vdbg(c, "CPPR setting to %d\n", cppr); + + lock(&xs->lock); + c->xstate->cppr = cppr; + out_8(xs->tm_ring1 + TM_QW3_HV_PHYS + TM_CPPR, cppr); + + unlock(&xs->lock); + + return OPAL_SUCCESS; +} + +static int64_t opal_xive_set_mfrr(uint32_t cpu, uint8_t mfrr) +{ + struct cpu_thread *c = find_cpu_by_server(cpu); + struct xive_cpu_state *xs; + uint8_t old_mfrr; + + if (!c) + return OPAL_PARAMETER; + xs = c->xstate; + if (!xs) + return OPAL_INTERNAL_ERROR; + + lock(&xs->lock); + old_mfrr = xs->mfrr; + xive_cpu_vdbg(c, " Setting MFRR to %x, old is %x\n", mfrr, old_mfrr); + xs->mfrr = mfrr; + if (old_mfrr > mfrr && mfrr < xs->cppr) + xive_ipi_trigger(xs->xive, GIRQ_TO_IDX(xs->ipi_irq)); + unlock(&xs->lock); + + return OPAL_SUCCESS; +} + +void init_xive(void) +{ + struct dt_node *np; + struct proc_chip *chip; + struct cpu_thread *cpu; + + /* Look for xive nodes and do basic inits */ + dt_for_each_compatible(dt_root, np, "ibm,power9-xive-x") { + init_one_xive(np); + } + + /* Some inits must be done after all xive have been created + * such as setting up the forwarding ports + */ + for_each_chip(chip) { + if (chip->xive) + late_init_one_xive(chip->xive); + } + + /* Initialize XICS emulation per-cpu structures */ + for_each_cpu(cpu) { + xive_init_cpu(cpu); + } + + /* Calling boot CPU */ + xive_cpu_callin(this_cpu()); + + /* Register XICS emulation calls */ + opal_register(OPAL_INT_GET_XIRR, opal_xive_get_xirr, 2); + opal_register(OPAL_INT_SET_CPPR, opal_xive_set_cppr, 1); + opal_register(OPAL_INT_EOI, opal_xive_eoi, 1); + opal_register(OPAL_INT_SET_MFRR, opal_xive_set_mfrr, 2); +} diff --git a/include/chip.h b/include/chip.h index 4541368..1f31a13 100644 --- a/include/chip.h +++ b/include/chip.h @@ -106,6 +106,7 @@ struct dt_node; struct centaur_chip; struct mfsi; +struct xive; /* Chip type */ enum proc_chip_type { @@ -201,6 +202,9 @@ struct proc_chip { /* Used by hw/fsi-master.c */ struct mfsi *fsi_masters; + + /* Used by hw/xive.c */ + struct xive *xive; }; extern uint32_t pir_to_chip_id(uint32_t pir); diff --git a/include/cpu.h b/include/cpu.h index 587cd52..59923d5 100644 --- a/include/cpu.h +++ b/include/cpu.h @@ -41,6 +41,7 @@ enum cpu_thread_state { }; struct cpu_job; +struct xive_cpu_state; struct cpu_thread { uint32_t pir; @@ -86,6 +87,9 @@ struct cpu_thread { /* Mask to indicate thread id in core. */ uint8_t thread_mask; bool tb_invalid; + + /* For use by XICS emulation on XIVE */ + struct xive_cpu_state *xstate; }; /* This global is set to 1 to allow secondaries to callin, diff --git a/include/opal-api.h b/include/opal-api.h index 84c6925..fa76b8d 100644 --- a/include/opal-api.h +++ b/include/opal-api.h @@ -167,10 +167,10 @@ #define OPAL_PCI_GET_PRESENCE_STATE 119 #define OPAL_PCI_GET_POWER_STATE 120 #define OPAL_PCI_SET_POWER_STATE 121 -#define OPAL_INT_GET_XIRR 122 /* Not yet implemented */ -#define OPAL_INT_SET_CPPR 123 /* Not yet implemented */ -#define OPAL_INT_EOI 124 /* Not yet implemented */ -#define OPAL_INT_SET_MFRR 125 /* Not yet implemented */ +#define OPAL_INT_GET_XIRR 122 +#define OPAL_INT_SET_CPPR 123 +#define OPAL_INT_EOI 124 +#define OPAL_INT_SET_MFRR 125 #define OPAL_LAST 125 /* Device tree flags */ diff --git a/include/xive.h b/include/xive.h new file mode 100644 index 0000000..c3bd33a --- /dev/null +++ b/include/xive.h @@ -0,0 +1,378 @@ +/* Copyright 2016 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __XIVE_H__ +#define __XIVE_H__ + +/* IC register offsets */ +#define CQ_SWI_CMD_HIST 0x020 +#define CQ_SWI_CMD_POLL 0x028 +#define CQ_SWI_CMD_BCAST 0x030 +#define CQ_SWI_CMD_ASSIGN 0x038 +#define CQ_SWI_CMD_BLK_UPD 0x040 +#define CQ_SWI_RSP 0x048 +#define X_CQ_CFG_PB_GEN 0x0a +#define CQ_CFG_PB_GEN 0x050 +#define X_CQ_IC_BAR 0x10 +#define X_CQ_MSGSND 0x0b +#define CQ_MSGSND 0x058 +#define CQ_CNPM_SEL 0x078 +#define CQ_IC_BAR 0x080 +#define CQ_IC_BAR_VALID PPC_BIT(0) +#define CQ_IC_BAR_64K PPC_BIT(1) +#define X_CQ_TM1_BAR 0x12 +#define CQ_TM1_BAR 0x90 +#define X_CQ_TM2_BAR 0x014 +#define CQ_TM2_BAR 0x0a0 +#define CQ_TM_BAR_VALID PPC_BIT(0) +#define CQ_TM_BAR_64K PPC_BIT(1) +#define X_CQ_PC_BAR 0x16 +#define CQ_PC_BAR 0x0b0 +#define CQ_PC_BAR_VALID PPC_BIT(0) +#define X_CQ_PC_BARM 0x17 +#define CQ_PC_BARM 0x0b8 +#define CQ_PC_BARM_MASK PPC_BITMASK(26,38) +#define X_CQ_VC_BAR 0x18 +#define CQ_VC_BAR 0x0c0 +#define CQ_VC_BAR_VALID PPC_BIT(0) +#define X_CQ_VC_BARM 0x19 +#define CQ_VC_BARM 0x0c8 +#define CQ_VC_BARM_MASK PPC_BITMASK(21,37) +#define X_CQ_TAR 0x1e +#define CQ_TAR 0x0f0 +#define CQ_TAR_TBL_AUTOINC PPC_BIT(0) +#define CQ_TAR_TSEL_BLK PPC_BIT(12) +#define CQ_TAR_TSEL_MIG PPC_BIT(13) +#define CQ_TAR_TSEL_VDT PPC_BIT(14) +#define CQ_TAR_TSEL_EDT PPC_BIT(15) +#define X_CQ_TDR 0x1f +#define CQ_TDR 0x0f8 +#define X_CQ_PBI_CTL 0x20 +#define CQ_PBI_CTL 0x100 +#define CQ_PBI_PC_64K PPC_BIT(5) +#define CQ_PBI_VC_64K PPC_BIT(6) +#define CQ_PBI_LNX_TRIG PPC_BIT(7) +#define CQ_PBO_CTL 0x108 +#define CQ_AIB_CTL 0x110 +#define X_CQ_RST_CTL 0x23 +#define CQ_RST_CTL 0x118 + +/* PC LBS1 register offsets */ +#define X_PC_TCTXT_CFG 0x100 +#define PC_TCTXT_CFG 0x400 +#define PC_TCTXT_CFG_BLKGRP_EN PPC_BIT(0) +#define PC_TCTXT_CFG_HARD_CHIPID_BLK PPC_BIT(8) +#define X_PC_THREAD_EN_REG0 0x108 +#define PC_THREAD_EN_REG0 0x440 +#define X_PC_THREAD_EN_REG0_SET 0x109 +#define PC_THREAD_EN_REG0_SET 0x448 +#define X_PC_THREAD_EN_REG0_CLR 0x10a +#define PC_THREAD_EN_REG0_CLR 0x450 +#define X_PC_THREAD_EN_REG1 0x10c +#define PC_THREAD_EN_REG1 0x460 +#define X_PC_THREAD_EN_REG1_SET 0x10d +#define PC_THREAD_EN_REG1_SET 0x468 +#define X_PC_THREAD_EN_REG1_CLR 0x10e +#define PC_THREAD_EN_REG1_CLR 0x470 +#define X_PC_GLOBAL_CONFIG 0x110 +#define PC_GLOBAL_CONFIG 0x480 +#define PC_GCONF_INDIRECT PPC_BIT(32) +#define X_PC_VSD_TABLE_ADDR 0x111 +#define PC_VSD_TABLE_ADDR 0x488 +#define X_PC_VSD_TABLE_DATA 0x112 +#define PC_VSD_TABLE_DATA 0x490 + +/* PC LBS2 register offsets */ +#define X_PC_VPC_CACHE_ENABLE 0x161 +#define PC_VPC_CACHE_ENABLE 0x708 +#define PC_VPC_CACHE_EN_MASK PPC_BITMASK(0,31) +#define X_PC_VPC_SCRUB_TRIG 0x162 +#define PC_VPC_SCRUB_TRIG 0x710 +#define X_PC_VPC_SCRUB_MASK 0x163 +#define PC_VPC_SCRUB_MASK 0x718 +#define PC_SCRUB_VALID PPC_BIT(0) +#define PC_SCRUB_WANT_DISABLE PPC_BIT(1) +#define PC_SCRUB_WANT_INVAL PPC_BIT(2) +#define PC_SCRUB_BLOCK_ID PPC_BITMASK(27,31) +#define PC_SCRUB_OFFSET PPC_BITMASK(45,63) + +/* VC0 register offsets */ +#define X_VC_GLOBAL_CONFIG 0x200 +#define VC_GLOBAL_CONFIG 0x800 +#define VC_GCONF_INDIRECT PPC_BIT(32) +#define X_VC_VSD_TABLE_ADDR 0x201 +#define VC_VSD_TABLE_ADDR 0x808 +#define X_VC_VSD_TABLE_DATA 0x202 +#define VC_VSD_TABLE_DATA 0x810 +#define VC_IVE_ISB_BLOCK_MODE 0x818 +#define VC_EQD_BLOCK_MODE 0x820 +#define VC_VPS_BLOCK_MODE 0x828 +#define VC_IRQ_CONFIG_IPI 0x840 +#define VC_IRQ_CONFIG_HW 0x848 +#define VC_IRQ_CONFIG_CASCADE1 0x850 +#define VC_IRQ_CONFIG_CASCADE2 0x858 +#define VC_IRQ_CONFIG_REDIST 0x860 +#define VC_IRQ_CONFIG_IPI_CASC 0x868 +#define X_VC_AT_MACRO_KILL 0x23e +#define VC_AT_MACRO_KILL 0x8b0 +#define X_VC_AT_MACRO_KILL_MASK 0x23f +#define VC_AT_MACRO_KILL_MASK 0x8b8 +#define VC_KILL_VALID PPC_BIT(0) +#define VC_KILL_TYPE PPC_BITMASK(14,15) +#define VC_KILL_IRQ 0 +#define VC_KILL_IVC 1 +#define VC_KILL_SBC 2 +#define VC_KILL_EQD 3 +#define VC_KILL_BLOCK_ID PPC_BITMASK(27,31) +#define VC_KILL_OFFSET PPC_BITMASK(48,60) +#define X_VC_EQC_CACHE_ENABLE 0x211 +#define VC_EQC_CACHE_ENABLE 0x908 +#define VC_EQC_CACHE_EN_MASK PPC_BITMASK(0,15) +#define X_VC_EQC_SCRUB_TRIG 0x212 +#define VC_EQC_SCRUB_TRIG 0x910 +#define X_VC_EQC_SCRUB_MASK 0x213 +#define VC_EQC_SCRUB_MASK 0x918 +#define X_VC_IVC_SCRUB_TRIG 0x222 +#define VC_IVC_SCRUB_TRIG 0x990 +#define X_VC_IVC_SCRUB_MASK 0x223 +#define VC_IVC_SCRUB_MASK 0x998 +#define X_VC_SBC_SCRUB_TRIG 0x232 +#define VC_SBC_SCRUB_TRIG 0xa10 +#define X_VC_SBC_SCRUB_MASK 0x233 +#define VC_SBC_SCRUB_MASK 0xa18 +#define VC_SCRUB_VALID PPC_BIT(0) +#define VC_SCRUB_WANT_DISABLE PPC_BIT(1) +#define VC_SCRUB_WANT_INVAL PPC_BIT(2) /* EQC and SBC only */ +#define VC_SCRUB_BLOCK_ID PPC_BITMASK(28,31) +#define VC_SCRUB_OFFSET PPC_BITMASK(41,63) +#define X_VC_IVC_CACHE_ENABLE 0x221 +#define VC_IVC_CACHE_ENABLE 0x988 +#define VC_IVC_CACHE_EN_MASK PPC_BITMASK(0,15) +#define X_VC_SBC_CACHE_ENABLE 0x231 +#define VC_SBC_CACHE_ENABLE 0xa08 +#define VC_SBC_CACHE_EN_MASK PPC_BITMASK(0,15) +#define VC_IVC_CACHE_SCRUB_TRIG 0x990 +#define VC_IVC_CACHE_SCRUB_MASK 0x998 +#define VC_SBC_CACHE_ENABLE 0xa08 +#define VC_SBC_CACHE_SCRUB_TRIG 0xa10 +#define VC_SBC_CACHE_SCRUB_MASK 0xa18 +#define VC_SBC_CONFIG 0xa20 + +/* VC1 register offsets */ + +/* VSD Table address register definitions (shared) */ +#define VST_ADDR_AUTOINC PPC_BIT(0) +#define VST_TABLE_SELECT PPC_BITMASK(13,15) +#define VST_TSEL_IVT 0 +#define VST_TSEL_SBE 1 +#define VST_TSEL_EQDT 2 +#define VST_TSEL_VPDT 3 +#define VST_TSEL_IRQ 4 /* VC only */ +#define VST_TABLE_OFFSET PPC_BITMASK(27,31) + +/* Bits in a VSD entry. + * + * Note: the address is naturally aligned, we don't use a PPC_BITMASK, + * but just a mask to apply to the address before OR'ing it in. + */ +#define VSD_MODE PPC_BITMASK(0,1) +#define VSD_MODE_SHARED 1 +#define VSD_MODE_EXCLUSIVE 2 +#define VSD_MODE_FORWARD 3 +#define VSD_ADDRESS_MASK 0x0ffffffffffff000ull +#define VSD_MIGRATION_REG PPC_BITMASK(52,55) +#define VSD_INDIRECT PPC_BIT(56) +#define VSD_TSIZE PPC_BITMASK(59,63) + +/* + * TM registers are special, see below + */ + +/* TM register offsets */ +#define TM_QW0_USER 0x000 /* All rings */ +#define TM_QW1_OS 0x010 /* Ring 0..2 */ +#define TM_QW2_HV_POOL 0x020 /* Ring 0..1 */ +#define TM_QW3_HV_PHYS 0x030 /* Ring 0..1 */ + +/* Byte offsets inside a QW QW0 QW1 QW2 QW3 */ +#define TM_NSR 0x0 /* + + - + */ +#define TM_CPPR 0x1 /* - + - + */ +#define TM_IPB 0x2 /* - + + + */ +#define TM_LSMFB 0x3 /* - + + + */ +#define TM_ACK_CNT 0x4 /* - + - - */ +#define TM_INC 0x5 /* - + - + */ +#define TM_AGE 0x6 /* - + - + */ +#define TM_PIPR 0x7 /* - + - + */ + +/* QW word 2 contains the valid bit at the top and other fields + * depending on the QW + */ +#define TM_WORD2 0x8 +#define TM_QW0W2_VU PPC_BIT32(0) +#define TM_QW0W2_LOGIC_SERV PPC_BITMASK32(1,31) // XX 2,31 ? +#define TM_QW1W2_VO PPC_BIT32(0) +#define TM_QW1W2_OS_CAM PPC_BITMASK32(8,31) +#define TM_QW2W2_VP PPC_BIT32(0) +#define TM_QW2W2_POOL_CAM PPC_BITMASK32(8,31) +#define TM_QW3W2_VT PPC_BIT32(0) +#define TM_QW3W2_LP PPC_BIT32(6) +#define TM_QW3W2_LE PPC_BIT32(7) +#define TM_QW3W2_T PPC_BIT32(31) + +/* In addition to normal loads to "peek" and writes (only when invalid) + * using 4 and 8 bytes accesses, the above registers support these + * "special" byte operations: + * + * - Byte load from QW0[NSR] - User level NSR (EBB) + * - Byte store to QW0[NSR] - User level NSR (EBB) + * - Byte load/store to QW1[CPPR] and QW3[CPPR] - CPPR access + * - Byte load from QW3[TM_WORD2] - Read VT||00000||LP||LE on thrd 0 + * otherwise VT||0000000 + * - Byte store to QW3[TM_WORD2] - Set VT bit (and LP/LE if present) + * + * Then we have all these "special" CI ops at these offset that trigger + * all sorts of side effects: + */ +#define TM_SPC_ACK_EBB 0x800 /* Load8 ack EBB to reg*/ +#define TM_SPC_ACK_OS_REG 0x810 /* Load16 ack OS irq to reg */ +#define TM_SPC_ACK_OS_EL 0xc10 /* Store8 ack OS irq to even line */ +#define TM_SPC_PUSH_USR_CTX 0x808 /* Store32 Push/Validate user context */ +#define TM_SPC_PULL_USR_CTX 0x808 /* Load32 Pull/Invalidate user context */ +#define TM_SPC_PULL_USR_CTX_OL 0xc08 /* Store8 Pull/Inval usr ctx to odd line */ +#define TM_SPC_SET_OS_PENDING 0x812 /* Store8 Set OS irq pending bit */ +#define TM_SPC_ACK_HV_REG 0x830 /* Load16 ack HV irq to reg */ +#define TM_SPC_ACK_HV_POOL_EL 0xc20 /* Store8 ack HV evt pool to even line */ +#define TM_SPC_ACK_HV_EL 0xc30 /* Store8 ack HV irq to even line */ +/* XXX more... */ + +/* NSR fields for the various QW ack types */ +#define TM_QW0_NSR_EB PPC_BIT8(0) +#define TM_QW1_NSR_EO PPC_BIT8(0) +#define TM_QW3_NSR_HE PPC_BITMASK8(0,1) +#define TM_QW3_NSR_HE_NONE 0 +#define TM_QW3_NSR_HE_POOL 1 +#define TM_QW3_NSR_HE_PHYS 2 +#define TM_QW3_NSR_HE_LSI 3 +#define TM_QW3_NSR_I PPC_BIT8(2) +#define TM_QW3_NSR_GRP_LVL PPC_BIT8(3,7) + +/* + * Definition of the XIVE in-memory tables + */ + +/* IVE/EAS + * + * One per interrupt source. Targets that interrupt to a given EQ + * and provides the corresponding logical interrupt number (EQ data) + */ +struct xive_ive { + /* Use a single 64-bit definition to make it easier to + * perform atomic updates + */ + uint64_t w; +#define IVE_VALID PPC_BIT(0) +#define IVE_EQ_BLOCK PPC_BITMASK(4,7) /* Destination EQ block# */ +#define IVE_EQ_INDEX PPC_BITMASK(8,31) /* Destination EQ index */ +#define IVE_MASKED PPC_BIT(32) /* Masked */ +#define IVE_EQ_DATA PPC_BITMASK(33,63) /* Data written to the EQ */ +}; + +/* EQ */ +struct xive_eq { + uint32_t w0; +#define EQ_W0_VALID PPC_BIT32(0) +#define EQ_W0_ENQUEUE PPC_BIT32(1) +#define EQ_W0_UCOND_NOTIFY PPC_BIT32(2) +#define EQ_W0_BACKLOG PPC_BIT32(3) +#define EQ_W0_PRECL_ESC_CTL PPC_BIT32(4) +#define EQ_W0_ESCALATE_CTL PPC_BIT32(5) +#define EQ_W0_END_OF_INTR PPC_BIT32(6) +#define EQ_W0_QSIZE PPC_BITMASK32(12,15) +#define EQ_QSIZE_4K 0 +#define EQ_QSIZE_64K 4 +#define EQ_W0_HWDEP PPC_BITMASK32(24,31) + uint32_t w1; +#define EQ_W1_ESn PPC_BITMASK32(0,1) +#define EQ_W1_ESe PPC_BITMASK32(2,3) +#define EQ_W1_GENERATION PPC_BIT32(9) +#define EQ_W1_PAGE_OFF PPC_BITMASK32(10,31) + uint32_t w2; +#define EQ_W2_MIGRATION_REG PPC_BITMASK32(0,3) +#define EQ_W2_OP_DESC_HI PPC_BITMASK32(4,31) + uint32_t w3; +#define EQ_W3_OP_DESC_LO PPC_BITMASK32(0,31) + uint32_t w4; +#define EQ_W4_ESC_EQ_BLOCK PPC_BITMASK32(4,7) +#define EQ_W4_ESC_EQ_INDEX PPC_BITMASK32(8,31) + uint32_t w5; +#define EQ_W5_ESC_EQ_DATA PPC_BITMASK32(1,31) + uint32_t w6; +#define EQ_W6_FORMAT_BIT PPC_BIT32(8) +#define EQ_W6_NVT_BLOCK PPC_BITMASK32(9,12) +#define EQ_W6_NVT_INDEX PPC_BITMASK32(13,31) + uint32_t w7; +#define EQ_W7_F0_IGNORE PPC_BIT32(0) +#define EQ_W7_F0_BLK_GROUPING PPC_BIT32(1) +#define EQ_W7_F0_PRIORITY PPC_BITMASK32(8,15) +#define EQ_W7_F1_WAKEZ PPC_BIT32(0) +#define EQ_W7_F1_LOG_SERVER_ID PPC_BITMASK32(1,31) +}; + +/* VP */ +struct xive_vp { + uint32_t w0; +#define VP_W0_VALID PPC_BIT32(0) + uint32_t w1; + uint32_t w2; + uint32_t w3; + uint32_t w4; + uint32_t w5; + uint32_t w6; + uint32_t w7; + uint32_t w8; +#define VP_W8_GRP_VALID PPC_BIT32(0) + uint32_t w9; + uint32_t wa; + uint32_t wb; + uint32_t wc; + uint32_t wd; + uint32_t we; + uint32_t wf; +}; + +/* Internal APIs to other modules */ + +/* IRQ allocators return this on failure */ +#define XIVE_IRQ_ERROR 0xffffffff + +void init_xive(void); + +/* Allocate a chunk of HW sources */ +uint32_t xive_alloc_hw_irqs(uint32_t chip_id, uint32_t count, uint32_t align); +/* Allocate a chunk of IPI sources */ +uint32_t xive_alloc_ipi_irqs(uint32_t chip_id, uint32_t count, uint32_t align); + +/* Get notification port address for a HW source entity */ +#define XIVE_HW_SRC_PHBn(__n) (__n) +#define XIVE_HW_SRC_PSI 8 + +uint64_t xive_get_notify_port(uint32_t chip_id, uint32_t ent); + +bool xive_get_eq_info(uint32_t isn, uint32_t *out_target, uint8_t *out_prio); +bool xive_set_eq_info(uint32_t isn, uint32_t target, uint8_t prio); + +void xive_cpu_callin(struct cpu_thread *cpu); + +#endif /* __XIVE_H__ */ |