aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
Diffstat (limited to 'core')
-rw-r--r--core/Makefile.inc12
-rw-r--r--core/affinity.c132
-rw-r--r--core/backtrace.c41
-rw-r--r--core/chip.c85
-rw-r--r--core/console.c334
-rw-r--r--core/cpu.c672
-rw-r--r--core/device.c791
-rw-r--r--core/exceptions.c529
-rw-r--r--core/fast-reboot.c346
-rw-r--r--core/fdt.c208
-rw-r--r--core/flash-nvram.c76
-rw-r--r--core/hostservices.c826
-rw-r--r--core/init.c687
-rw-r--r--core/interrupts.c332
-rw-r--r--core/lock.c125
-rw-r--r--core/malloc.c84
-rw-r--r--core/mem_region.c956
-rw-r--r--core/nvram.c248
-rw-r--r--core/opal-msg.c167
-rw-r--r--core/opal.c308
-rw-r--r--core/pci-opal.c666
-rw-r--r--core/pci.c1388
-rw-r--r--core/platform.c78
-rw-r--r--core/relocate.c65
-rw-r--r--core/test/Makefile.check29
-rw-r--r--core/test/run-device.c118
-rw-r--r--core/test/run-malloc-speed.c94
-rw-r--r--core/test/run-malloc.c144
-rw-r--r--core/test/run-mem_region.c250
-rw-r--r--core/test/run-mem_region_init.c179
-rw-r--r--core/test/run-mem_region_release_unused.c177
-rw-r--r--core/test/run-mem_region_release_unused_noalloc.c159
-rw-r--r--core/test/run-msg.c256
-rw-r--r--core/test/run-trace.c386
-rw-r--r--core/test/stubs.c43
-rw-r--r--core/timebase.c67
-rw-r--r--core/trace.c244
-rw-r--r--core/utils.c59
-rw-r--r--core/vpd.c211
39 files changed, 11572 insertions, 0 deletions
diff --git a/core/Makefile.inc b/core/Makefile.inc
new file mode 100644
index 0000000..843ce05
--- /dev/null
+++ b/core/Makefile.inc
@@ -0,0 +1,12 @@
+# -*-Makefile-*-
+
+SUBDIRS += core
+CORE_OBJS = relocate.o console.o backtrace.o init.o chip.o mem_region.o
+CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o
+CORE_OBJS += timebase.o opal-msg.o pci.o pci-opal.o fast-reboot.o
+CORE_OBJS += device.o exceptions.o trace.o affinity.o vpd.o
+CORE_OBJS += hostservices.o platform.o nvram.o flash-nvram.o
+CORE=core/built-in.o
+
+$(CORE): $(CORE_OBJS:%=core/%)
+
diff --git a/core/affinity.c b/core/affinity.c
new file mode 100644
index 0000000..d5eea82
--- /dev/null
+++ b/core/affinity.c
@@ -0,0 +1,132 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ *
+ * We currently construct our associativity properties as such:
+ *
+ * - For "chip" devices (bridges, memory, ...), 4 entries:
+ *
+ * - CCM node ID
+ * - HW card ID
+ * - HW module ID
+ * - Chip ID
+ *
+ * The information is constructed based on the chip ID which (unlike
+ * pHyp) is our HW chip ID (aka "XSCOM" chip ID). We use it to retrieve
+ * the other properties from the corresponding chip/xscom node in the
+ * device-tree. If those properties are absent, 0 is used.
+ *
+ * - For "core" devices, we add a 5th entry:
+ *
+ * - Core ID
+ *
+ * Here too, we do not use the "cooked" HW processor ID from HDAT but
+ * intead use the real HW core ID which is basically the interrupt
+ * server number of thread 0 on that core.
+ *
+ *
+ * The ibm,associativity-reference-points property is currently set to
+ * 4,4 indicating that the chip ID is our only reference point. This
+ * should be extended to encompass the node IDs eventually.
+ */
+#include <skiboot.h>
+#include <opal.h>
+#include <device.h>
+#include <console.h>
+#include <trace.h>
+#include <chip.h>
+#include <cpu.h>
+#include <affinity.h>
+
+static uint32_t get_chip_node_id(struct proc_chip *chip)
+{
+ /* If the xscom node has an ibm,ccm-node-id property, use it */
+ if (dt_has_node_property(chip->devnode, "ibm,ccm-node-id", NULL))
+ return dt_prop_get_u32(chip->devnode, "ibm,ccm-node-id");
+
+ /*
+ * Else use the 3 top bits of the chip ID which should be
+ * the node on both P7 and P8
+ */
+ return chip->id >> 3;
+}
+
+void add_associativity_ref_point(void)
+{
+ int ref2 = 0x4;
+
+ /*
+ * Note about our use of reference points:
+ *
+ * Linux currently supports two levels of NUMA. We use the first
+ * reference point for the node ID and the second reference point
+ * for a second level of affinity. We always use the chip ID (4)
+ * for the first reference point.
+ *
+ * Choosing the second level of affinity is model specific
+ * unfortunately. Current POWER8E models should use the DCM
+ * as a second level of NUMA.
+ *
+ * If there is a way to obtain this information from the FSP
+ * that would be ideal, but for now hardwire our POWER8E setting.
+ */
+ if (PVR_TYPE(mfspr(SPR_PVR)) == PVR_TYPE_P8E)
+ ref2 = 0x3;
+
+ dt_add_property_cells(opal_node, "ibm,associativity-reference-points",
+ 0x4, ref2);
+}
+
+void add_chip_dev_associativity(struct dt_node *dev)
+{
+ uint32_t chip_id = dt_get_chip_id(dev);
+ struct proc_chip *chip = get_chip(chip_id);
+ uint32_t hw_cid, hw_mid;
+
+ if (!chip)
+ return;
+
+ hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
+ hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
+
+ dt_add_property_cells(dev, "ibm,associativity", 4,
+ get_chip_node_id(chip),
+ hw_cid, hw_mid, chip_id);
+}
+
+void add_core_associativity(struct cpu_thread *cpu)
+{
+ struct proc_chip *chip = get_chip(cpu->chip_id);
+ uint32_t hw_cid, hw_mid, core_id;
+
+ if (!chip)
+ return;
+
+ if (proc_gen == proc_gen_p7)
+ core_id = (cpu->pir >> 2) & 0x7;
+ else if (proc_gen == proc_gen_p8)
+ core_id = (cpu->pir >> 3) & 0xf;
+ else
+ return;
+
+ hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0);
+ hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0);
+
+ dt_add_property_cells(cpu->node, "ibm,associativity", 5,
+ get_chip_node_id(chip),
+ hw_cid, hw_mid, chip->id, core_id);
+}
diff --git a/core/backtrace.c b/core/backtrace.c
new file mode 100644
index 0000000..3439db0
--- /dev/null
+++ b/core/backtrace.c
@@ -0,0 +1,41 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <skiboot.h>
+#include <processor.h>
+#include <cpu.h>
+
+void backtrace(void)
+{
+ unsigned int pir = mfspr(SPR_PIR);
+ unsigned long *sp;
+ unsigned long *bottom, *top;
+
+ /* Check if there's a __builtin_something instead */
+ asm("mr %0,1" : "=r" (sp));
+
+ bottom = cpu_stack_bottom(pir);
+ top = cpu_stack_top(pir);
+
+ /* XXX Handle SMP */
+ fprintf(stderr, "CPU %08x Backtrace:\n", pir);
+ while(sp > bottom && sp < top) {
+ fprintf(stderr, " S: %016lx R: %016lx\n",
+ (unsigned long)sp, sp[2]);
+ sp = (unsigned long *)sp[0];
+ }
+}
diff --git a/core/chip.c b/core/chip.c
new file mode 100644
index 0000000..e6eb81c
--- /dev/null
+++ b/core/chip.c
@@ -0,0 +1,85 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <skiboot.h>
+#include <chip.h>
+#include <device.h>
+
+static struct proc_chip *chips[MAX_CHIPS];
+
+uint32_t pir_to_chip_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p8)
+ return P8_PIR2GCID(pir);
+ else
+ return P7_PIR2GCID(pir);
+}
+
+uint32_t pir_to_core_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p8)
+ return P8_PIR2COREID(pir);
+ else
+ return P7_PIR2COREID(pir);
+}
+
+uint32_t pir_to_thread_id(uint32_t pir)
+{
+ if (proc_gen == proc_gen_p8)
+ return P8_PIR2THREADID(pir);
+ else
+ return P7_PIR2THREADID(pir);
+}
+
+struct proc_chip *next_chip(struct proc_chip *chip)
+{
+ unsigned int i;
+
+ for (i = chip ? (chip->id + 1) : 0; i < MAX_CHIPS; i++)
+ if (chips[i])
+ return chips[i];
+ return NULL;
+}
+
+
+struct proc_chip *get_chip(uint32_t chip_id)
+{
+ return chips[chip_id];
+}
+
+void init_chips(void)
+{
+ struct proc_chip *chip;
+ struct dt_node *xn;
+
+ /* We walk the chips based on xscom nodes in the tree */
+ dt_for_each_compatible(dt_root, xn, "ibm,xscom") {
+ uint32_t id = dt_get_chip_id(xn);
+
+ assert(id < MAX_CHIPS);
+
+ chip = zalloc(sizeof(struct proc_chip));
+ assert(chip);
+ chip->id = id;
+ chip->devnode = xn;
+ chips[id] = chip;
+ chip->dbob_id = dt_prop_get_u32_def(xn, "ibm,dbob-id",
+ 0xffffffff);
+ chip->pcid = dt_prop_get_u32_def(xn, "ibm,proc-chip-id",
+ 0xffffffff);
+ };
+}
diff --git a/core/console.c b/core/console.c
new file mode 100644
index 0000000..b291b1b
--- /dev/null
+++ b/core/console.c
@@ -0,0 +1,334 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Console IO routine for use by libc
+ *
+ * fd is the classic posix 0,1,2 (stdin, stdout, stderr)
+ */
+#include <skiboot.h>
+#include <unistd.h>
+#include <console.h>
+#include <opal.h>
+#include <device.h>
+#include <processor.h>
+#include <cpu.h>
+
+static char *con_buf = (char *)INMEM_CON_START;
+static size_t con_in;
+static size_t con_out;
+static bool con_wrapped;
+static struct con_ops *con_driver;
+
+struct lock con_lock = LOCK_UNLOCKED;
+
+/* This is mapped via TCEs so we keep it alone in a page */
+struct memcons memcons __section(".data.memcons") = {
+ .magic = MEMCONS_MAGIC,
+ .obuf_phys = INMEM_CON_START,
+ .ibuf_phys = INMEM_CON_START + INMEM_CON_OUT_LEN,
+ .obuf_size = INMEM_CON_OUT_LEN,
+ .ibuf_size = INMEM_CON_IN_LEN,
+};
+
+bool dummy_console_enabled(void)
+{
+#ifdef FORCE_DUMMY_CONSOLE
+ return true;
+#else
+ return dt_has_node_property(dt_chosen,
+ "sapphire,enable-dummy-console", NULL);
+#endif
+}
+
+void force_dummy_console(void)
+{
+ dt_add_property(dt_chosen, "sapphire,enable-dummy-console", NULL, 0);
+}
+
+#ifdef MAMBO_CONSOLE
+static void mambo_write(const char *buf, size_t count)
+{
+#define SIM_WRITE_CONSOLE_CODE 0
+ register int c asm("r3") = 0; /* SIM_WRITE_CONSOLE_CODE */
+ register unsigned long a1 asm("r4") = (unsigned long)buf;
+ register unsigned long a2 asm("r5") = count;
+ register unsigned long a3 asm("r6") = 0;
+ asm volatile (".long 0x000eaeb0":"=r" (c):"r"(c), "r"(a1), "r"(a2),
+ "r"(a3));
+}
+#else
+static void mambo_write(const char *buf __unused, size_t count __unused) { }
+#endif /* MAMBO_CONSOLE */
+
+void clear_console(void)
+{
+ memset(con_buf, 0, INMEM_CON_LEN);
+}
+
+/*
+ * Flush the console buffer into the driver, returns true
+ * if there is more to go
+ */
+bool __flush_console(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+ size_t req, len = 0;
+ static bool in_flush, more_flush;
+
+ /* Is there anything to flush ? Bail out early if not */
+ if (con_in == con_out || !con_driver)
+ return false;
+
+ /*
+ * Console flushing is suspended on this CPU, typically because
+ * some critical locks are held that would potentially case a
+ * flush to deadlock
+ */
+ if (cpu->con_suspend) {
+ cpu->con_need_flush = true;
+ return false;
+ }
+ cpu->con_need_flush = false;
+
+ /*
+ * We must call the underlying driver with the console lock
+ * dropped otherwise we get some deadlocks if anything down
+ * that path tries to printf() something.
+ *
+ * So instead what we do is we keep a static in_flush flag
+ * set/released with the lock held, which is used to prevent
+ * concurrent attempts at flushing the same chunk of buffer
+ * by other processors.
+ */
+ if (in_flush) {
+ more_flush = true;
+ return false;
+ }
+ in_flush = true;
+
+ do {
+ more_flush = false;
+ if (con_out > con_in) {
+ req = INMEM_CON_OUT_LEN - con_out;
+ unlock(&con_lock);
+ len = con_driver->write(con_buf + con_out, req);
+ lock(&con_lock);
+ con_out = (con_out + len) % INMEM_CON_OUT_LEN;
+ if (len < req)
+ goto bail;
+ }
+ if (con_out < con_in) {
+ unlock(&con_lock);
+ len = con_driver->write(con_buf + con_out,
+ con_in - con_out);
+ lock(&con_lock);
+ con_out = (con_out + len) % INMEM_CON_OUT_LEN;
+ }
+ } while(more_flush);
+bail:
+ in_flush = false;
+ return con_out != con_in;
+}
+
+bool flush_console(void)
+{
+ bool ret;
+
+ lock(&con_lock);
+ ret = __flush_console();
+ unlock(&con_lock);
+
+ return ret;
+}
+
+static void inmem_write(char c)
+{
+ uint32_t opos;
+
+ if (!c)
+ return;
+ con_buf[con_in++] = c;
+ if (con_in >= INMEM_CON_OUT_LEN) {
+ con_in = 0;
+ con_wrapped = true;
+ }
+
+ /*
+ * We must always re-generate memcons.out_pos because
+ * under some circumstances, the console script will
+ * use a broken putmemproc that does RMW on the full
+ * 8 bytes containing out_pos and in_prod, thus corrupting
+ * out_pos
+ */
+ opos = con_in;
+ if (con_wrapped)
+ opos |= MEMCONS_OUT_POS_WRAP;
+ lwsync();
+ memcons.out_pos = opos;
+
+ /* If head reaches tail, push tail around & drop chars */
+ if (con_in == con_out)
+ con_out = (con_in + 1) % INMEM_CON_OUT_LEN;
+}
+
+static size_t inmem_read(char *buf, size_t req)
+{
+ size_t read = 0;
+ char *ibuf = (char *)memcons.ibuf_phys;
+
+ while (req && memcons.in_prod != memcons.in_cons) {
+ *(buf++) = ibuf[memcons.in_cons];
+ lwsync();
+ memcons.in_cons = (memcons.in_cons + 1) % INMEM_CON_IN_LEN;
+ req--;
+ read++;
+ }
+ return read;
+}
+
+static void write_char(char c)
+{
+ mambo_write(&c, 1);
+ inmem_write(c);
+}
+
+ssize_t write(int fd __unused, const void *buf, size_t count)
+{
+ /* We use recursive locking here as we can get called
+ * from fairly deep debug path
+ */
+ bool need_unlock = lock_recursive(&con_lock);
+ const char *cbuf = buf;
+
+ while(count--) {
+ char c = *(cbuf++);
+ if (c == 10)
+ write_char(13);
+ write_char(c);
+ }
+
+ __flush_console();
+
+ if (need_unlock)
+ unlock(&con_lock);
+
+ return count;
+}
+
+ssize_t read(int fd __unused, void *buf, size_t req_count)
+{
+ bool need_unlock = lock_recursive(&con_lock);
+ size_t count = 0;
+
+ if (con_driver && con_driver->read)
+ count = con_driver->read(buf, req_count);
+ if (!count)
+ count = inmem_read(buf, req_count);
+ if (need_unlock)
+ unlock(&con_lock);
+ return count;
+}
+
+void set_console(struct con_ops *driver)
+{
+ con_driver = driver;
+ if (driver)
+ flush_console();
+}
+
+void memcons_add_properties(void)
+{
+ uint64_t addr = (u64)&memcons;
+
+ dt_add_property_cells(opal_node, "ibm,opal-memcons",
+ hi32(addr), lo32(addr));
+}
+
+/*
+ * Default OPAL console provided if nothing else overrides it
+ */
+static int64_t dummy_console_write(int64_t term_number, int64_t *length,
+ const uint8_t *buffer)
+{
+ if (term_number != 0)
+ return OPAL_PARAMETER;
+ write(0, buffer, *length);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CONSOLE_WRITE, dummy_console_write, 3);
+
+static int64_t dummy_console_write_buffer_space(int64_t term_number,
+ int64_t *length)
+{
+ if (term_number != 0)
+ return OPAL_PARAMETER;
+ if (length)
+ *length = INMEM_CON_OUT_LEN;
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CONSOLE_WRITE_BUFFER_SPACE, dummy_console_write_buffer_space, 2);
+
+static int64_t dummy_console_read(int64_t term_number, int64_t *length,
+ uint8_t *buffer)
+{
+ if (term_number != 0)
+ return OPAL_PARAMETER;
+ *length = read(0, buffer, *length);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CONSOLE_READ, dummy_console_read, 3);
+
+static void dummy_console_poll(void *data __unused)
+{
+ bool uart_has_data;
+
+ lock(&con_lock);
+ uart_has_data = uart_console_poll();
+
+ if (uart_has_data || memcons.in_prod != memcons.in_cons)
+ opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT,
+ OPAL_EVENT_CONSOLE_INPUT);
+ else
+ opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, 0);
+ unlock(&con_lock);
+
+}
+
+void dummy_console_add_nodes(void)
+{
+ struct dt_node *con, *consoles;
+
+ consoles = dt_new(opal_node, "consoles");
+ assert(consoles);
+ dt_add_property_cells(consoles, "#address-cells", 1);
+ dt_add_property_cells(consoles, "#size-cells", 0);
+
+ con = dt_new_addr(consoles, "serial", 0);
+ assert(con);
+ dt_add_property_string(con, "compatible", "ibm,opal-console-raw");
+ dt_add_property_cells(con, "#write-buffer-size", INMEM_CON_OUT_LEN);
+ dt_add_property_cells(con, "reg", 0);
+ dt_add_property_string(con, "device_type", "serial");
+
+ dt_add_property_string(dt_chosen, "linux,stdout-path",
+ "/ibm,opal/consoles/serial@0");
+
+ opal_add_poller(dummy_console_poll, NULL);
+}
diff --git a/core/cpu.c b/core/cpu.c
new file mode 100644
index 0000000..0eea946
--- /dev/null
+++ b/core/cpu.c
@@ -0,0 +1,672 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * TODO: Index array by PIR to be able to catch them easily
+ * from assembly such as machine checks etc...
+ */
+#include <skiboot.h>
+#include <cpu.h>
+#include <fsp.h>
+#include <device.h>
+#include <opal.h>
+#include <stack.h>
+#include <trace.h>
+#include <affinity.h>
+#include <chip.h>
+#include <timebase.h>
+#include <ccan/str/str.h>
+#include <ccan/container_of/container_of.h>
+
+/* The cpu_threads array is static and indexed by PIR in
+ * order to speed up lookup from asm entry points
+ */
+struct cpu_stack {
+ union {
+ uint8_t stack[STACK_SIZE];
+ struct cpu_thread cpu;
+ };
+} __align(STACK_SIZE);
+
+static struct cpu_stack *cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
+unsigned int cpu_thread_count;
+unsigned int cpu_max_pir;
+struct cpu_thread *boot_cpu;
+static struct lock reinit_lock = LOCK_UNLOCKED;
+
+unsigned long cpu_secondary_start __force_data = 0;
+
+struct cpu_job {
+ struct list_node link;
+ void (*func)(void *data);
+ void *data;
+ bool complete;
+ bool no_return;
+};
+
+/* attribute const as cpu_stacks is constant. */
+void __attrconst *cpu_stack_bottom(unsigned int pir)
+{
+ return (void *)&cpu_stacks[pir] + sizeof(struct cpu_thread);
+}
+
+void __attrconst *cpu_stack_top(unsigned int pir)
+{
+ /* This is the top of the MC stack which is above the normal
+ * stack, which means a SP between cpu_stack_bottom() and
+ * cpu_stack_top() can either be a normal stack pointer or
+ * a Machine Check stack pointer
+ */
+ return (void *)&cpu_stacks[pir] + STACK_SIZE - STACK_TOP_GAP;
+}
+
+struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
+ void (*func)(void *data), void *data,
+ bool no_return)
+{
+ struct cpu_job *job;
+
+ if (!cpu_is_available(cpu)) {
+ prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n",
+ cpu->pir);
+ return NULL;
+ }
+
+ job = zalloc(sizeof(struct cpu_job));
+ if (!job)
+ return NULL;
+ job->func = func;
+ job->data = data;
+ job->complete = false;
+ job->no_return = no_return;
+
+ if (cpu != this_cpu()) {
+ lock(&cpu->job_lock);
+ list_add_tail(&cpu->job_queue, &job->link);
+ unlock(&cpu->job_lock);
+ } else {
+ func(data);
+ job->complete = true;
+ }
+
+ /* XXX Add poking of CPU with interrupt */
+
+ return job;
+}
+
+bool cpu_poll_job(struct cpu_job *job)
+{
+ lwsync();
+ return job->complete;
+}
+
+void cpu_wait_job(struct cpu_job *job, bool free_it)
+{
+ if (!job)
+ return;
+
+ while(!job->complete) {
+ /* Handle mbox if master CPU */
+ if (this_cpu() == boot_cpu)
+ fsp_poll();
+ else
+ smt_low();
+ lwsync();
+ }
+ lwsync();
+ smt_medium();
+
+ if (free_it)
+ free(job);
+}
+
+void cpu_free_job(struct cpu_job *job)
+{
+ if (!job)
+ return;
+
+ assert(job->complete);
+ free(job);
+}
+
+void cpu_process_jobs(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+ struct cpu_job *job;
+ void (*func)(void *);
+ void *data;
+
+ sync();
+ if (list_empty(&cpu->job_queue))
+ return;
+
+ lock(&cpu->job_lock);
+ while (true) {
+ bool no_return;
+
+ if (list_empty(&cpu->job_queue))
+ break;
+ smt_medium();
+ job = list_pop(&cpu->job_queue, struct cpu_job, link);
+ if (!job)
+ break;
+ func = job->func;
+ data = job->data;
+ no_return = job->no_return;
+ unlock(&cpu->job_lock);
+ if (no_return)
+ free(job);
+ func(data);
+ lock(&cpu->job_lock);
+ if (!no_return) {
+ lwsync();
+ job->complete = true;
+ }
+ }
+ unlock(&cpu->job_lock);
+}
+
+struct dt_node *get_cpu_node(u32 pir)
+{
+ struct cpu_thread *t = find_cpu_by_pir(pir);
+
+ return t ? t->node : NULL;
+}
+
+/* This only covers primary, active cpus */
+struct cpu_thread *find_cpu_by_chip_id(u32 chip_id)
+{
+ struct cpu_thread *t;
+
+ for_each_available_cpu(t) {
+ if (t->is_secondary)
+ continue;
+ if (t->chip_id == chip_id)
+ return t;
+ }
+ return NULL;
+}
+
+struct cpu_thread *find_cpu_by_node(struct dt_node *cpu)
+{
+ struct cpu_thread *t;
+
+ for_each_available_cpu(t) {
+ if (t->node == cpu)
+ return t;
+ }
+ return NULL;
+}
+
+struct cpu_thread *find_cpu_by_pir(u32 pir)
+{
+ if (pir > cpu_max_pir)
+ return NULL;
+ return &cpu_stacks[pir].cpu;
+}
+
+struct cpu_thread *find_cpu_by_server(u32 server_no)
+{
+ struct cpu_thread *t;
+
+ for_each_cpu(t) {
+ if (t->server_no == server_no)
+ return t;
+ }
+ return NULL;
+}
+
+struct cpu_thread *next_cpu(struct cpu_thread *cpu)
+{
+ struct cpu_stack *s = container_of(cpu, struct cpu_stack, cpu);
+ unsigned int index;
+
+ if (cpu == NULL)
+ index = 0;
+ else
+ index = s - cpu_stacks + 1;
+ for (; index <= cpu_max_pir; index++) {
+ cpu = &cpu_stacks[index].cpu;
+ if (cpu->state != cpu_state_no_cpu)
+ return cpu;
+ }
+ return NULL;
+}
+
+struct cpu_thread *first_cpu(void)
+{
+ return next_cpu(NULL);
+}
+
+struct cpu_thread *next_available_cpu(struct cpu_thread *cpu)
+{
+ do {
+ cpu = next_cpu(cpu);
+ } while(cpu && !cpu_is_available(cpu));
+
+ return cpu;
+}
+
+struct cpu_thread *first_available_cpu(void)
+{
+ return next_available_cpu(NULL);
+}
+
+struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core,
+ u32 chip_id)
+{
+ do {
+ core = next_cpu(core);
+ } while(core && (!cpu_is_available(core) ||
+ core->chip_id != chip_id ||
+ core->is_secondary));
+ return core;
+}
+
+struct cpu_thread *first_available_core_in_chip(u32 chip_id)
+{
+ return next_available_core_in_chip(NULL, chip_id);
+}
+
+uint32_t cpu_get_core_index(struct cpu_thread *cpu)
+{
+ return pir_to_core_id(cpu->pir);
+}
+
+void cpu_remove_node(const struct cpu_thread *t)
+{
+ struct dt_node *i;
+
+ /* Find this cpu node */
+ dt_for_each_node(dt_root, i) {
+ const struct dt_property *p;
+
+ if (!dt_has_node_property(i, "device_type", "cpu"))
+ continue;
+ p = dt_find_property(i, "ibm,pir");
+ if (dt_property_get_cell(p, 0) == t->pir) {
+ dt_free(i);
+ return;
+ }
+ }
+ prerror("CPU: Could not find cpu node %i to remove!\n", t->pir);
+ abort();
+}
+
+void cpu_disable_all_threads(struct cpu_thread *cpu)
+{
+ unsigned int i;
+
+ for (i = 0; i <= cpu_max_pir; i++) {
+ struct cpu_thread *t = &cpu_stacks[i].cpu;
+
+ if (t->primary == cpu->primary)
+ t->state = cpu_state_disabled;
+ }
+
+ /* XXX Do something to actually stop the core */
+}
+
+static void init_cpu_thread(struct cpu_thread *t,
+ enum cpu_thread_state state,
+ unsigned int pir)
+{
+ init_lock(&t->job_lock);
+ list_head_init(&t->job_queue);
+ t->state = state;
+ t->pir = pir;
+ assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
+}
+
+void pre_init_boot_cpu(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ memset(cpu, 0, sizeof(struct cpu_thread));
+}
+
+void init_boot_cpu(void)
+{
+ unsigned int i, pir, pvr;
+
+ pir = mfspr(SPR_PIR);
+ pvr = mfspr(SPR_PVR);
+
+ /* Get a CPU thread count and an initial max PIR based on PVR */
+ switch(PVR_TYPE(pvr)) {
+ case PVR_TYPE_P7:
+ case PVR_TYPE_P7P:
+ cpu_thread_count = 4;
+ cpu_max_pir = SPR_PIR_P7_MASK;
+ proc_gen = proc_gen_p7;
+ printf("CPU: P7 generation processor\n");
+ break;
+ case PVR_TYPE_P8E:
+ case PVR_TYPE_P8:
+ cpu_thread_count = 8;
+ cpu_max_pir = SPR_PIR_P8_MASK;
+ proc_gen = proc_gen_p8;
+ printf("CPU: P8 generation processor\n");
+ break;
+ default:
+ prerror("CPU: Unknown PVR, assuming 1 thread\n");
+ cpu_thread_count = 1;
+ cpu_max_pir = mfspr(SPR_PIR);
+ proc_gen = proc_gen_unknown;
+ }
+
+ printf("CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n", pir, pvr);
+ printf("CPU: Initial max PIR set to 0x%x\n", cpu_max_pir);
+ printf("CPU: Assuming max %d threads per core\n", cpu_thread_count);
+
+ /* Clear the CPU structs */
+ for (i = 0; i <= cpu_max_pir; i++)
+ memset(&cpu_stacks[i].cpu, 0, sizeof(struct cpu_thread));
+
+ /* Setup boot CPU state */
+ boot_cpu = &cpu_stacks[pir].cpu;
+ init_cpu_thread(boot_cpu, cpu_state_active, pir);
+ init_boot_tracebuf(boot_cpu);
+ assert(this_cpu() == boot_cpu);
+}
+
+void init_all_cpus(void)
+{
+ struct dt_node *cpus, *cpu;
+ unsigned int thread, new_max_pir = 0;
+
+ cpus = dt_find_by_path(dt_root, "/cpus");
+ assert(cpus);
+
+ /* Iterate all CPUs in the device-tree */
+ dt_for_each_child(cpus, cpu) {
+ unsigned int pir, server_no, chip_id;
+ enum cpu_thread_state state;
+ const struct dt_property *p;
+ struct cpu_thread *t, *pt;
+
+ /* Skip cache nodes */
+ if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
+ continue;
+
+ server_no = dt_prop_get_u32(cpu, "reg");
+
+ /* If PIR property is absent, assume it's the same as the
+ * server number
+ */
+ pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);
+
+ /* We should always have an ibm,chip-id property */
+ chip_id = dt_get_chip_id(cpu);
+
+ /* Only use operational CPUs */
+ if (!strcmp(dt_prop_get(cpu, "status"), "okay"))
+ state = cpu_state_present;
+ else
+ state = cpu_state_unavailable;
+
+ printf("CPU: CPU from DT PIR=0x%04x Server#=0x%x State=%d\n",
+ pir, server_no, state);
+
+ /* Setup thread 0 */
+ t = pt = &cpu_stacks[pir].cpu;
+ if (t != boot_cpu) {
+ init_cpu_thread(t, state, pir);
+ /* Each cpu gets its own later in init_trace_buffers */
+ t->trace = boot_cpu->trace;
+ }
+ t->server_no = server_no;
+ t->primary = t;
+ t->node = cpu;
+ t->chip_id = chip_id;
+ t->icp_regs = 0; /* Will be set later */
+
+ /* Add associativity properties */
+ add_core_associativity(t);
+
+ /* Adjust max PIR */
+ if (new_max_pir < (pir + cpu_thread_count - 1))
+ new_max_pir = pir + cpu_thread_count - 1;
+
+ /* Iterate threads */
+ p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
+ if (!p)
+ continue;
+ for (thread = 1; thread < (p->len / 4); thread++) {
+ printf("CPU: secondary thread %d found\n", thread);
+ t = &cpu_stacks[pir + thread].cpu;
+ init_cpu_thread(t, state, pir + thread);
+ t->trace = boot_cpu->trace;
+ t->server_no = ((const u32 *)p->prop)[thread];
+ t->is_secondary = true;
+ t->primary = pt;
+ t->node = cpu;
+ t->chip_id = chip_id;
+ }
+ }
+ cpu_max_pir = new_max_pir;
+ printf("CPU: New max PIR set to 0x%x\n", new_max_pir);
+}
+
+void cpu_bringup(void)
+{
+ struct cpu_thread *t;
+
+ printf("CPU: Setting up secondary CPU state\n");
+
+ op_display(OP_LOG, OP_MOD_CPU, 0x0000);
+
+ /* Tell everybody to chime in ! */
+ printf("CPU: Calling in all processors...\n");
+ cpu_secondary_start = 1;
+ sync();
+
+ op_display(OP_LOG, OP_MOD_CPU, 0x0002);
+
+ for_each_cpu(t) {
+ if (t->state != cpu_state_present &&
+ t->state != cpu_state_active)
+ continue;
+
+ /* Add a callin timeout ? If so, call cpu_remove_node(t). */
+ while (t->state != cpu_state_active) {
+ smt_very_low();
+ sync();
+ }
+ smt_medium();
+ }
+
+ printf("CPU: All processors called in...\n");
+
+ op_display(OP_LOG, OP_MOD_CPU, 0x0003);
+}
+
+void cpu_callin(struct cpu_thread *cpu)
+{
+ cpu->state = cpu_state_active;
+}
+
+static void opal_start_thread_job(void *data)
+{
+ cpu_give_self_os();
+
+ /* We do not return, so let's mark the job as
+ * complete
+ */
+ start_kernel_secondary((uint64_t)data);
+}
+
+static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
+{
+ struct cpu_thread *cpu;
+ struct cpu_job *job;
+
+ cpu = find_cpu_by_server(server_no);
+ if (!cpu) {
+ prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no);
+ return OPAL_PARAMETER;
+ }
+ printf("OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n",
+ server_no, cpu->pir, start_address);
+
+ lock(&reinit_lock);
+ if (!cpu_is_available(cpu)) {
+ unlock(&reinit_lock);
+ prerror("OPAL: CPU not active in OPAL !\n");
+ return OPAL_WRONG_STATE;
+ }
+ job = __cpu_queue_job(cpu, opal_start_thread_job, (void *)start_address,
+ true);
+ unlock(&reinit_lock);
+ if (!job) {
+ prerror("OPAL: Failed to create CPU start job !\n");
+ return OPAL_INTERNAL_ERROR;
+ }
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2);
+
+static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status)
+{
+ struct cpu_thread *cpu;
+
+ cpu = find_cpu_by_server(server_no);
+ if (!cpu) {
+ prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no);
+ return OPAL_PARAMETER;
+ }
+ if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) {
+ prerror("OPAL: CPU not active in OPAL nor OS !\n");
+ return OPAL_PARAMETER;
+ }
+ switch(cpu->state) {
+ case cpu_state_os:
+ *thread_status = OPAL_THREAD_STARTED;
+ break;
+ case cpu_state_active:
+ /* Active in skiboot -> inactive in OS */
+ *thread_status = OPAL_THREAD_INACTIVE;
+ break;
+ default:
+ *thread_status = OPAL_THREAD_UNAVAILABLE;
+ }
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2);
+
+static int64_t opal_return_cpu(void)
+{
+ printf("OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);
+
+ __secondary_cpu_entry();
+
+ return OPAL_HARDWARE; /* Should not happen */
+}
+opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0);
+
+static void cpu_change_hile(void *hilep)
+{
+ bool hile = *(bool *)hilep;
+ unsigned long hid0;
+
+ hid0 = mfspr(SPR_HID0);
+ if (hile)
+ hid0 |= SPR_HID0_HILE;
+ else
+ hid0 &= ~SPR_HID0_HILE;
+ printf("CPU: [%08x] HID0 set to 0x%016lx\n", this_cpu()->pir, hid0);
+ set_hid0(hid0);
+
+ this_cpu()->current_hile = hile;
+}
+
+static int64_t cpu_change_all_hile(bool hile)
+{
+ struct cpu_thread *cpu;
+
+ printf("CPU: Switching HILE on all CPUs to %d\n", hile);
+
+ for_each_available_cpu(cpu) {
+ if (cpu->current_hile == hile)
+ continue;
+ if (cpu == this_cpu()) {
+ cpu_change_hile(&hile);
+ continue;
+ }
+ cpu_wait_job(cpu_queue_job(cpu, cpu_change_hile, &hile), true);
+ }
+ return OPAL_SUCCESS;
+}
+
+static int64_t opal_reinit_cpus(uint64_t flags)
+{
+ struct cpu_thread *cpu;
+ int64_t rc = OPAL_SUCCESS;
+ int i;
+
+ lock(&reinit_lock);
+
+ prerror("OPAL: Trying a CPU re-init with flags: 0x%llx\n", flags);
+
+ for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
+ if (cpu == this_cpu())
+ continue;
+ if (cpu->state == cpu_state_os) {
+ /*
+ * That might be a race with return CPU during kexec
+ * where we are still, wait a bit and try again
+ */
+ for (i = 0; (i < 3) && (cpu->state == cpu_state_os); i++)
+ time_wait_ms(1);
+ if (cpu->state == cpu_state_os) {
+ prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
+ rc = OPAL_WRONG_STATE;
+ goto bail;
+ }
+ }
+ }
+ /*
+ * Now we need to mark ourselves "active" or we'll be skipped
+ * by the various "for_each_active_..." calls done by slw_reinit()
+ */
+ this_cpu()->state = cpu_state_active;
+
+ /*
+ * If the flags affect endianness and we are on P8 DD2 or later, then
+ * use the HID bit. We use the PVR (we could use the EC level in
+ * the chip but the PVR is more readily available).
+ */
+ if (proc_gen == proc_gen_p8 && PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2 &&
+ (flags & (OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE))) {
+ bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE);
+
+ flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
+ rc = cpu_change_all_hile(hile);
+ }
+
+ /* Any flags left ? */
+ if (flags != 0)
+ rc = slw_reinit(flags);
+
+ /* And undo the above */
+ this_cpu()->state = cpu_state_os;
+
+bail:
+ unlock(&reinit_lock);
+ return rc;
+}
+opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);
diff --git a/core/device.c b/core/device.c
new file mode 100644
index 0000000..28cccb7
--- /dev/null
+++ b/core/device.c
@@ -0,0 +1,791 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <device.h>
+#include <stdlib.h>
+#include <skiboot.h>
+#include <libfdt/libfdt.h>
+#include <libfdt/libfdt_internal.h>
+#include <ccan/str/str.h>
+#include <ccan/endian/endian.h>
+
+/* Used to give unique handles. */
+u32 last_phandle = 0;
+
+struct dt_node *dt_root;
+struct dt_node *dt_chosen;
+
+static const char *take_name(const char *name)
+{
+ if (!is_rodata(name) && !(name = strdup(name))) {
+ prerror("Failed to allocate copy of name");
+ abort();
+ }
+ return name;
+}
+
+static void free_name(const char *name)
+{
+ if (!is_rodata(name))
+ free((char *)name);
+}
+
+static struct dt_node *new_node(const char *name)
+{
+ struct dt_node *node = malloc(sizeof *node);
+ if (!node) {
+ prerror("Failed to allocate node\n");
+ abort();
+ }
+
+ node->name = take_name(name);
+ node->parent = NULL;
+ list_head_init(&node->properties);
+ list_head_init(&node->children);
+ /* FIXME: locking? */
+ node->phandle = ++last_phandle;
+ return node;
+}
+
+struct dt_node *dt_new_root(const char *name)
+{
+ return new_node(name);
+}
+
+bool dt_attach_root(struct dt_node *parent, struct dt_node *root)
+{
+ struct dt_node *node;
+
+ /* Look for duplicates */
+
+ assert(!root->parent);
+ dt_for_each_child(parent, node) {
+ if (!strcmp(node->name, root->name)) {
+ prerror("DT: %s failed, duplicate %s\n",
+ __func__, root->name);
+ return false;
+ }
+ }
+ list_add_tail(&parent->children, &root->list);
+ root->parent = parent;
+
+ return true;
+}
+
+struct dt_node *dt_new(struct dt_node *parent, const char *name)
+{
+ struct dt_node *new;
+ assert(parent);
+
+ new = new_node(name);
+ if (!dt_attach_root(parent, new)) {
+ free_name(new->name);
+ free(new);
+ return NULL;
+ }
+ return new;
+}
+
+struct dt_node *dt_new_addr(struct dt_node *parent, const char *name,
+ uint64_t addr)
+{
+ char *lname;
+ struct dt_node *new;
+ size_t len;
+
+ assert(parent);
+ len = strlen(name) + STR_MAX_CHARS(addr) + 2;
+ lname = malloc(len);
+ if (!lname)
+ return NULL;
+ snprintf(lname, len, "%s@%llx", name, (long long)addr);
+ new = new_node(lname);
+ free(lname);
+ if (!dt_attach_root(parent, new)) {
+ free_name(new->name);
+ free(new);
+ return NULL;
+ }
+ return new;
+}
+
+struct dt_node *dt_new_2addr(struct dt_node *parent, const char *name,
+ uint64_t addr0, uint64_t addr1)
+{
+ char *lname;
+ struct dt_node *new;
+ size_t len;
+ assert(parent);
+
+ len = strlen(name) + 2*STR_MAX_CHARS(addr0) + 3;
+ lname = malloc(len);
+ if (!lname)
+ return NULL;
+ snprintf(lname, len, "%s@%llx,%llx",
+ name, (long long)addr0, (long long)addr1);
+ new = new_node(lname);
+ free(lname);
+ if (!dt_attach_root(parent, new)) {
+ free_name(new->name);
+ free(new);
+ return NULL;
+ }
+ return new;
+}
+
+char *dt_get_path(const struct dt_node *node)
+{
+ unsigned int len = 0;
+ const struct dt_node *n;
+ char *path, *p;
+
+ /* Dealing with NULL is for test/debug purposes */
+ if (!node)
+ return strdup("<NULL>");
+
+ for (n = node; n; n = n->parent) {
+ len += strlen(n->name);
+ if (n->parent || n == node)
+ len++;
+ }
+ path = zalloc(len + 1);
+ assert(path);
+ p = path + len;
+ for (n = node; n; n = n->parent) {
+ len = strlen(n->name);
+ p -= len;
+ memcpy(p, n->name, len);
+ if (n->parent || n == node)
+ *(--p) = '/';
+ }
+ assert(p == path);
+
+ return p;
+}
+
+static const char *__dt_path_split(const char *p,
+ const char **namep, unsigned int *namel,
+ const char **addrp, unsigned int *addrl)
+{
+ const char *at, *sl;
+
+ *namel = *addrl = 0;
+
+ /* Skip initial '/' */
+ while (*p == '/')
+ p++;
+
+ /* Check empty path */
+ if (*p == 0)
+ return p;
+
+ at = strchr(p, '@');
+ sl = strchr(p, '/');
+ if (sl == NULL)
+ sl = p + strlen(p);
+ if (sl < at)
+ at = NULL;
+ if (at) {
+ *addrp = at + 1;
+ *addrl = sl - at - 1;
+ }
+ *namep = p;
+ *namel = at ? (at - p) : (sl - p);
+
+ return sl;
+}
+
+struct dt_node *dt_find_by_path(struct dt_node *root, const char *path)
+{
+ struct dt_node *n;
+ const char *pn, *pa, *p = path, *nn, *na;
+ unsigned int pnl, pal, nnl, nal;
+ bool match;
+
+ /* Walk path components */
+ while (*p) {
+ /* Extract next path component */
+ p = __dt_path_split(p, &pn, &pnl, &pa, &pal);
+ if (pnl == 0 && pal == 0)
+ break;
+
+ /* Compare with each child node */
+ match = false;
+ list_for_each(&root->children, n, list) {
+ match = true;
+ __dt_path_split(n->name, &nn, &nnl, &na, &nal);
+ if (pnl && (pnl != nnl || strncmp(pn, nn, pnl)))
+ match = false;
+ if (pal && (pal != nal || strncmp(pa, na, pal)))
+ match = false;
+ if (match) {
+ root = n;
+ break;
+ }
+ }
+
+ /* No child match */
+ if (!match)
+ return NULL;
+ }
+ return root;
+}
+
+struct dt_node *dt_find_by_phandle(struct dt_node *root, u32 phandle)
+{
+ struct dt_node *node;
+
+ dt_for_each_node(root, node)
+ if (node->phandle == phandle)
+ return node;
+ return NULL;
+}
+
+static struct dt_property *new_property(struct dt_node *node,
+ const char *name, size_t size)
+{
+ struct dt_property *p = malloc(sizeof(*p) + size);
+ if (!p) {
+ prerror("Failed to allocate property \"%s\" for %s of %zu bytes\n",
+ name, dt_get_path(node), size);
+ abort();
+ }
+ if (dt_find_property(node, name)) {
+ prerror("Duplicate property \"%s\" in node %s\n",
+ name, dt_get_path(node));
+ abort();
+
+ }
+
+ p->name = take_name(name);
+ p->len = size;
+ list_add_tail(&node->properties, &p->list);
+ return p;
+}
+
+struct dt_property *dt_add_property(struct dt_node *node,
+ const char *name,
+ const void *val, size_t size)
+{
+ struct dt_property *p;
+
+ /*
+ * Filter out phandle properties, we re-generate them
+ * when flattening
+ */
+ if (strcmp(name, "linux,phandle") == 0 ||
+ strcmp(name, "phandle") == 0) {
+ assert(size == 4);
+ node->phandle = *(const u32 *)val;
+ if (node->phandle >= last_phandle)
+ last_phandle = node->phandle;
+ return NULL;
+ }
+
+ p = new_property(node, name, size);
+ if (size)
+ memcpy(p->prop, val, size);
+ return p;
+}
+
+void dt_resize_property(struct dt_property **prop, size_t len)
+{
+ size_t new_len = sizeof(**prop) + len;
+
+ *prop = realloc(*prop, new_len);
+
+ /* Fix up linked lists in case we moved. (note: not an empty list). */
+ (*prop)->list.next->prev = &(*prop)->list;
+ (*prop)->list.prev->next = &(*prop)->list;
+}
+
+struct dt_property *dt_add_property_string(struct dt_node *node,
+ const char *name,
+ const char *value)
+{
+ return dt_add_property(node, name, value, strlen(value)+1);
+}
+
+struct dt_property *dt_add_property_nstr(struct dt_node *node,
+ const char *name,
+ const char *value, unsigned int vlen)
+{
+ struct dt_property *p;
+ char *tmp = zalloc(vlen + 1);
+
+ strncpy(tmp, value, vlen);
+ p = dt_add_property(node, name, tmp, strlen(tmp)+1);
+ free(tmp);
+
+ return p;
+}
+
+struct dt_property *__dt_add_property_cells(struct dt_node *node,
+ const char *name,
+ int count, ...)
+{
+ struct dt_property *p;
+ u32 *val;
+ unsigned int i;
+ va_list args;
+
+ p = new_property(node, name, count * sizeof(u32));
+ val = (u32 *)p->prop;
+ va_start(args, count);
+ for (i = 0; i < count; i++)
+ val[i] = cpu_to_fdt32(va_arg(args, u32));
+ va_end(args);
+ return p;
+}
+
+struct dt_property *__dt_add_property_u64s(struct dt_node *node,
+ const char *name,
+ int count, ...)
+{
+ struct dt_property *p;
+ u64 *val;
+ unsigned int i;
+ va_list args;
+
+ p = new_property(node, name, count * sizeof(u64));
+ val = (u64 *)p->prop;
+ va_start(args, count);
+ for (i = 0; i < count; i++)
+ val[i] = cpu_to_fdt64(va_arg(args, u64));
+ va_end(args);
+ return p;
+}
+
+struct dt_property *__dt_add_property_strings(struct dt_node *node,
+ const char *name,
+ int count, ...)
+{
+ struct dt_property *p;
+ unsigned int i, size;
+ va_list args;
+ const char *sstr;
+ char *s;
+
+ va_start(args, count);
+ for (i = size = 0; i < count; i++) {
+ sstr = va_arg(args, const char *);
+ if (sstr)
+ size += strlen(sstr) + 1;
+ }
+ va_end(args);
+ if (!size)
+ size = 1;
+ p = new_property(node, name, size);
+ s = (char *)p->prop;
+ *s = 0;
+ va_start(args, count);
+ for (i = 0; i < count; i++) {
+ sstr = va_arg(args, const char *);
+ if (sstr) {
+ strcpy(s, sstr);
+ s = s + strlen(sstr) + 1;
+ }
+ }
+ va_end(args);
+ return p;
+}
+
+void dt_del_property(struct dt_node *node, struct dt_property *prop)
+{
+ list_del_from(&node->properties, &prop->list);
+ free_name(prop->name);
+ free(prop);
+}
+
+u32 dt_property_get_cell(const struct dt_property *prop, u32 index)
+{
+ assert(prop->len >= (index+1)*sizeof(u32));
+ /* Always aligned, so this works. */
+ return fdt32_to_cpu(((const u32 *)prop->prop)[index]);
+}
+
+/* First child of this node. */
+struct dt_node *dt_first(const struct dt_node *root)
+{
+ return list_top(&root->children, struct dt_node, list);
+}
+
+/* Return next node, or NULL. */
+struct dt_node *dt_next(const struct dt_node *root,
+ const struct dt_node *prev)
+{
+ /* Children? */
+ if (!list_empty(&prev->children))
+ return dt_first(prev);
+
+ do {
+ /* More siblings? */
+ if (prev->list.next != &prev->parent->children.n)
+ return list_entry(prev->list.next, struct dt_node,list);
+
+ /* No more siblings, move up to parent. */
+ prev = prev->parent;
+ } while (prev != root);
+
+ return NULL;
+}
+
+struct dt_property *__dt_find_property(struct dt_node *node, const char *name)
+{
+ struct dt_property *i;
+
+ list_for_each(&node->properties, i, list)
+ if (strcmp(i->name, name) == 0)
+ return i;
+ return NULL;
+}
+
+const struct dt_property *dt_find_property(const struct dt_node *node,
+ const char *name)
+{
+ const struct dt_property *i;
+
+ list_for_each(&node->properties, i, list)
+ if (strcmp(i->name, name) == 0)
+ return i;
+ return NULL;
+}
+
+const struct dt_property *dt_require_property(const struct dt_node *node,
+ const char *name, int wanted_len)
+{
+ const struct dt_property *p = dt_find_property(node, name);
+
+ if (!p) {
+ const char *path = dt_get_path(node);
+
+ prerror("DT: Missing required property %s/%s\n",
+ path, name);
+ assert(false);
+ }
+ if (wanted_len >= 0 && p->len != wanted_len) {
+ const char *path = dt_get_path(node);
+
+ prerror("DT: Unexpected property length %s/%s\n",
+ path, name);
+ prerror("DT: Expected len: %d got len: %zu\n",
+ wanted_len, p->len);
+ assert(false);
+ }
+
+ return p;
+}
+
+bool dt_has_node_property(const struct dt_node *node,
+ const char *name, const char *val)
+{
+ const struct dt_property *p = dt_find_property(node, name);
+
+ if (!p)
+ return false;
+ if (!val)
+ return true;
+
+ return p->len == strlen(val) + 1 && memcmp(p->prop, val, p->len) == 0;
+}
+
+bool dt_prop_find_string(const struct dt_property *p, const char *s)
+{
+ const char *c, *end;
+
+ if (!p)
+ return false;
+ c = p->prop;
+ end = c + p->len;
+
+ while(c < end) {
+ if (!strcasecmp(s, c))
+ return true;
+ c += strlen(c) + 1;
+ }
+ return false;
+}
+
+bool dt_node_is_compatible(const struct dt_node *node, const char *compat)
+{
+ const struct dt_property *p = dt_find_property(node, "compatible");
+
+ return dt_prop_find_string(p, compat);
+}
+
+struct dt_node *dt_find_compatible_node(struct dt_node *root,
+ struct dt_node *prev,
+ const char *compat)
+{
+ struct dt_node *node;
+
+ node = prev ? dt_next(root, prev) : root;
+ for (; node; node = dt_next(root, node))
+ if (dt_node_is_compatible(node, compat))
+ return node;
+ return NULL;
+}
+
+u64 dt_prop_get_u64(const struct dt_node *node, const char *prop)
+{
+ const struct dt_property *p = dt_require_property(node, prop, 8);
+
+ return ((u64)dt_property_get_cell(p, 0) << 32)
+ | dt_property_get_cell(p, 1);
+}
+
+u64 dt_prop_get_u64_def(const struct dt_node *node, const char *prop, u64 def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ if (!p)
+ return def;
+
+ return ((u64)dt_property_get_cell(p, 0) << 32)
+ | dt_property_get_cell(p, 1);
+}
+
+u32 dt_prop_get_u32(const struct dt_node *node, const char *prop)
+{
+ const struct dt_property *p = dt_require_property(node, prop, 4);
+
+ return dt_property_get_cell(p, 0);
+}
+
+u32 dt_prop_get_u32_def(const struct dt_node *node, const char *prop, u32 def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ if (!p)
+ return def;
+
+ return dt_property_get_cell(p, 0);
+}
+
+const void *dt_prop_get(const struct dt_node *node, const char *prop)
+{
+ const struct dt_property *p = dt_require_property(node, prop, -1);
+
+ return p->prop;
+}
+
+const void *dt_prop_get_def(const struct dt_node *node, const char *prop,
+ void *def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ return p ? p->prop : def;
+}
+
+const void *dt_prop_get_def_size(const struct dt_node *node, const char *prop,
+ void *def, size_t *len)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+ *len = 0;
+ if (p)
+ *len = p->len;
+
+ return p ? p->prop : def;
+}
+
+u32 dt_prop_get_cell(const struct dt_node *node, const char *prop, u32 cell)
+{
+ const struct dt_property *p = dt_require_property(node, prop, -1);
+
+ return dt_property_get_cell(p, cell);
+}
+
+u32 dt_prop_get_cell_def(const struct dt_node *node, const char *prop,
+ u32 cell, u32 def)
+{
+ const struct dt_property *p = dt_find_property(node, prop);
+
+ if (!p)
+ return def;
+
+ return dt_property_get_cell(p, cell);
+}
+
+void dt_free(struct dt_node *node)
+{
+ struct dt_node *child;
+ struct dt_property *p;
+
+ while ((child = list_top(&node->children, struct dt_node, list)))
+ dt_free(child);
+
+ while ((p = list_pop(&node->properties, struct dt_property, list))) {
+ free_name(p->name);
+ free(p);
+ }
+
+ if (node->parent)
+ list_del_from(&node->parent->children, &node->list);
+ free_name(node->name);
+ free(node);
+}
+
+int dt_expand_node(struct dt_node *node, const void *fdt, int fdt_node)
+{
+ const struct fdt_property *prop;
+ int offset, nextoffset, err;
+ struct dt_node *child;
+ const char *name;
+ uint32_t tag;
+
+ if (((err = fdt_check_header(fdt)) != 0)
+ || ((err = _fdt_check_node_offset(fdt, fdt_node)) < 0)) {
+ prerror("FDT: Error %d parsing node 0x%x\n", err, fdt_node);
+ return -1;
+ }
+
+ nextoffset = err;
+ do {
+ offset = nextoffset;
+
+ tag = fdt_next_tag(fdt, offset, &nextoffset);
+ switch (tag) {
+ case FDT_PROP:
+ prop = _fdt_offset_ptr(fdt, offset);
+ name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+ dt_add_property(node, name, prop->data,
+ fdt32_to_cpu(prop->len));
+ break;
+ case FDT_BEGIN_NODE:
+ name = fdt_get_name(fdt, offset, NULL);
+ child = dt_new_root(name);
+ assert(child);
+ nextoffset = dt_expand_node(child, fdt, offset);
+
+ /*
+ * This may fail in case of duplicate, keep it
+ * going for now, we may ultimately want to
+ * assert
+ */
+ (void)dt_attach_root(node, child);
+ break;
+ case FDT_END:
+ return -1;
+ }
+ } while (tag != FDT_END_NODE);
+
+ return nextoffset;
+}
+
+void dt_expand(const void *fdt)
+{
+ printf("FDT: Parsing fdt @%p\n", fdt);
+
+ dt_root = dt_new_root("");
+
+ dt_expand_node(dt_root, fdt, 0);
+}
+
+u64 dt_get_number(const void *pdata, unsigned int cells)
+{
+ const u32 *p = pdata;
+ u64 ret = 0;
+
+ while(cells--)
+ ret = (ret << 32) | be32_to_cpu(*(p++));
+ return ret;
+}
+
+u32 dt_n_address_cells(const struct dt_node *node)
+{
+ if (!node->parent)
+ return 0;
+ return dt_prop_get_u32_def(node->parent, "#address-cells", 2);
+}
+
+u32 dt_n_size_cells(const struct dt_node *node)
+{
+ if (!node->parent)
+ return 0;
+ return dt_prop_get_u32_def(node->parent, "#size-cells", 1);
+}
+
+u64 dt_get_address(const struct dt_node *node, unsigned int index,
+ u64 *out_size)
+{
+ const struct dt_property *p;
+ u32 na = dt_n_address_cells(node);
+ u32 ns = dt_n_size_cells(node);
+ u32 pos, n;
+
+ p = dt_require_property(node, "reg", -1);
+ n = (na + ns) * sizeof(u32);
+ pos = n * index;
+ assert((pos + n) <= p->len);
+ if (out_size)
+ *out_size = dt_get_number(p->prop + pos + na * sizeof(u32), ns);
+ return dt_get_number(p->prop + pos, na);
+}
+
+static u32 __dt_get_chip_id(const struct dt_node *node)
+{
+ const struct dt_property *prop;
+
+ for (; node; node = node->parent) {
+ prop = dt_find_property(node, "ibm,chip-id");
+ if (prop)
+ return dt_property_get_cell(prop, 0);
+ }
+ return 0xffffffff;
+}
+
+u32 dt_get_chip_id(const struct dt_node *node)
+{
+ u32 id = __dt_get_chip_id(node);
+ assert(id != 0xffffffff);
+ return id;
+}
+
+struct dt_node *dt_find_compatible_node_on_chip(struct dt_node *root,
+ struct dt_node *prev,
+ const char *compat,
+ uint32_t chip_id)
+{
+ struct dt_node *node;
+
+ node = prev ? dt_next(root, prev) : root;
+ for (; node; node = dt_next(root, node)) {
+ u32 cid = __dt_get_chip_id(node);
+ if (cid == chip_id &&
+ dt_node_is_compatible(node, compat))
+ return node;
+ }
+ return NULL;
+}
+
+unsigned int dt_count_addresses(const struct dt_node *node)
+{
+ const struct dt_property *p;
+ u32 na = dt_n_address_cells(node);
+ u32 ns = dt_n_size_cells(node);
+ u32 n;
+
+ p = dt_require_property(node, "reg", -1);
+ n = (na + ns) * sizeof(u32);
+ return p->len / n;
+}
+
+u64 dt_translate_address(const struct dt_node *node, unsigned int index,
+ u64 *out_size)
+{
+ /* XXX TODO */
+ return dt_get_address(node, index, out_size);
+}
diff --git a/core/exceptions.c b/core/exceptions.c
new file mode 100644
index 0000000..995ca92
--- /dev/null
+++ b/core/exceptions.c
@@ -0,0 +1,529 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <stack.h>
+#include <opal.h>
+#include <processor.h>
+#include <cpu.h>
+
+static uint64_t client_mc_address;
+
+extern uint8_t exc_primary_start;
+extern uint8_t exc_primary_end;
+
+extern uint32_t exc_primary_patch_branch;
+
+extern uint8_t exc_secondary_start;
+extern uint8_t exc_secondary_end;
+
+extern uint32_t exc_secondary_patch_stack;
+extern uint32_t exc_secondary_patch_mfsrr0;
+extern uint32_t exc_secondary_patch_mfsrr1;
+extern uint32_t exc_secondary_patch_type;
+extern uint32_t exc_secondary_patch_mtsrr0;
+extern uint32_t exc_secondary_patch_mtsrr1;
+extern uint32_t exc_secondary_patch_rfid;
+
+struct lock hmi_lock = LOCK_UNLOCKED;
+
+#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42))
+
+#define SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43,45))
+#define SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45))
+#define SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45))
+#define SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45))
+#define SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45))
+#define SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45))
+#define SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45))
+#define SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45))
+
+#define DSISR_MC_UE (PPC_BIT(48))
+#define DSISR_MC_UE_TABLEWALK (PPC_BIT(49))
+#define DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52))
+#define DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53))
+#define DSISR_MC_TLB_MULTIHIT_MFSLB (PPC_BIT(55))
+#define DSISR_MC_TLB_MULTIHIT (PPC_BIT(53) | PPC_BIT(55))
+#define DSISR_MC_SLB_MULTIHIT (PPC_BIT(56))
+#define DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57))
+
+static void mce_set_ierror(struct opal_machine_check_event *mce, uint64_t srr1)
+{
+ switch (SRR1_MC_IFETCH(srr1)) {
+ case SRR1_MC_IFETCH_SLB_PARITY:
+ mce->error_type = OpalMCE_ERROR_TYPE_SLB;
+ mce->u.slb_error.slb_error_type = OpalMCE_SLB_ERROR_PARITY;
+ break;
+
+ case SRR1_MC_IFETCH_SLB_MULTIHIT:
+ mce->error_type = OpalMCE_ERROR_TYPE_SLB;
+ mce->u.slb_error.slb_error_type = OpalMCE_SLB_ERROR_MULTIHIT;
+ break;
+
+ case SRR1_MC_IFETCH_SLB_BOTH:
+ mce->error_type = OpalMCE_ERROR_TYPE_SLB;
+ mce->u.slb_error.slb_error_type =
+ OpalMCE_SLB_ERROR_INDETERMINATE;
+ break;
+
+ case SRR1_MC_IFETCH_TLB_MULTIHIT:
+ mce->error_type = OpalMCE_ERROR_TYPE_TLB;
+ mce->u.tlb_error.tlb_error_type = OpalMCE_TLB_ERROR_MULTIHIT;
+ break;
+
+ case SRR1_MC_IFETCH_UE:
+ case SRR1_MC_IFETCH_UE_IFU_INTERNAL:
+ mce->error_type = OpalMCE_ERROR_TYPE_UE;
+ mce->u.ue_error.ue_error_type = OpalMCE_UE_ERROR_IFETCH;
+ break;
+
+ case SRR1_MC_IFETCH_UE_TLB_RELOAD:
+ mce->error_type = OpalMCE_ERROR_TYPE_UE;
+ mce->u.ue_error.ue_error_type =
+ OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH;
+ break;
+ }
+
+}
+
+static void mce_set_derror(struct opal_machine_check_event *mce, uint64_t dsisr)
+{
+ if (dsisr & DSISR_MC_UE) {
+ mce->error_type = OpalMCE_ERROR_TYPE_UE;
+ mce->u.ue_error.ue_error_type = OpalMCE_UE_ERROR_LOAD_STORE;
+
+ } else if (dsisr & DSISR_MC_UE_TABLEWALK) {
+ mce->error_type = OpalMCE_ERROR_TYPE_UE;
+ mce->u.ue_error.ue_error_type =
+ OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE;
+
+ } else if (dsisr & DSISR_MC_ERAT_MULTIHIT) {
+ mce->error_type = OpalMCE_ERROR_TYPE_ERAT;
+ mce->u.erat_error.erat_error_type =
+ OpalMCE_ERAT_ERROR_MULTIHIT;
+
+ } else if (dsisr & DSISR_MC_TLB_MULTIHIT) {
+ mce->error_type = OpalMCE_ERROR_TYPE_TLB;
+ mce->u.tlb_error.tlb_error_type =
+ OpalMCE_TLB_ERROR_MULTIHIT;
+
+ } else if (dsisr & DSISR_MC_SLB_MULTIHIT) {
+ mce->error_type = OpalMCE_ERROR_TYPE_SLB;
+ mce->u.slb_error.slb_error_type =
+ OpalMCE_SLB_ERROR_MULTIHIT;
+
+ } else if (dsisr & DSISR_MC_SLB_MULTIHIT_PARITY) {
+ mce->error_type = OpalMCE_ERROR_TYPE_SLB;
+ mce->u.slb_error.slb_error_type =
+ OpalMCE_SLB_ERROR_INDETERMINATE;
+ }
+}
+
+/* Called from head.S, thus no prototype */
+void handle_machine_check(struct stack_frame *stack);
+
+void handle_machine_check(struct stack_frame *stack)
+{
+ struct opal_machine_check_event *mce;
+ uint64_t srr1, addr;
+
+ mce = &this_cpu()->mc_event;
+
+ /* This will occur if we get another MC between the time that
+ * we re-set MSR_ME, and the OS clears this flag.
+ *
+ * However, the alternative is keeping MSR_ME cleared, and letting
+ * the OS re-set it (after clearing the flag). However, we
+ * risk a checkstop, and an opal assert() is the better option.
+ */
+ assert(!mce->in_use);
+
+ mce->in_use = 1;
+
+ /* Populate generic machine check info */
+ mce->version = OpalMCE_V1;
+ mce->srr0 = stack->srr0;
+ mce->srr1 = stack->srr1;
+ mce->gpr3 = stack->gpr[3];
+
+ mce->initiator = OpalMCE_INITIATOR_CPU;
+ mce->disposition = OpalMCE_DISPOSITION_NOT_RECOVERED;
+ mce->severity = OpalMCE_SEV_ERROR_SYNC;
+
+ srr1 = stack->srr1;
+
+ /* Populate the mce error_type and type-specific error_type from either
+ * SRR1 or DSISR, depending whether this was a load/store or ifetch
+ * exception */
+ if (SRR1_MC_LOADSTORE(srr1)) {
+ mce_set_derror(mce, srr1);
+ addr = stack->srr0;
+ } else {
+ mce_set_ierror(mce, mfspr(SPR_DSISR));
+ addr = mfspr(SPR_DAR);
+ }
+
+ if (mce->error_type == OpalMCE_ERROR_TYPE_TLB) {
+ mce->u.tlb_error.effective_address_provided = true;
+ mce->u.tlb_error.effective_address = addr;
+
+ } else if (mce->error_type == OpalMCE_ERROR_TYPE_SLB) {
+ mce->u.slb_error.effective_address_provided = true;
+ mce->u.slb_error.effective_address = addr;
+
+ } else if (mce->error_type == OpalMCE_ERROR_TYPE_ERAT) {
+ mce->u.erat_error.effective_address_provided = true;
+ mce->u.erat_error.effective_address = addr;
+
+ } else if (mce->error_type == OpalMCE_ERROR_TYPE_UE) {
+ mce->u.ue_error.effective_address_provided = true;
+ mce->u.ue_error.effective_address = addr;
+ }
+
+ /* Setup stack to rfi into the OS' handler, with ME re-enabled. */
+ stack->gpr[3] = (uint64_t)mce;
+ stack->srr0 = client_mc_address;
+ stack->srr1 = mfmsr() | MSR_ME;
+}
+
+#define REG "%016llx"
+#define REGS_PER_LINE 4
+#define LAST_VOLATILE 13
+
+static void dump_regs(struct stack_frame *stack, uint64_t hmer)
+{
+ int i;
+ uint64_t tfmr;
+
+ if (hmer & SPR_HMER_MALFUNCTION_ALERT)
+ printf("HMI: malfunction Alert\n");
+ if (hmer & SPR_HMER_HYP_RESOURCE_ERR)
+ printf("HMI: Hypervisor resource error.\n");
+ if (hmer & SPR_HMER_TFAC_ERROR) {
+ tfmr = mfspr(SPR_TFMR);
+ printf("HMI: TFAC error: SPRN_TFMR = 0x%016llx\n", tfmr);
+ }
+ if (hmer & SPR_HMER_TFMR_PARITY_ERROR) {
+ tfmr = mfspr(SPR_TFMR);
+ printf("HMI: TFMR parity error: SPRN_TFMR = 0x%016llx\n", tfmr);
+ }
+ printf("TRAP: %04llx\n", stack->type);
+ printf("SRR0: "REG" SRR1: "REG"\n", stack->srr0, stack->srr1);
+ printf("CFAR: "REG" LR: "REG" CTR: "REG"\n",
+ stack->cfar, stack->lr, stack->ctr);
+ printf(" CR: %08x XER: %08x\n", stack->cr, stack->xer);
+
+ for (i = 0; i < 32; i++) {
+ if ((i % REGS_PER_LINE) == 0)
+ printf("\nGPR%02d: ", i);
+ printf(REG " ", stack->gpr[i]);
+ if (i == LAST_VOLATILE)
+ break;
+ }
+ printf("\n");
+}
+
+/*
+ * HMER register layout:
+ * +===+==========+============================+========+===================+
+ * |Bit|Name |Description |PowerKVM|Action |
+ * | | | |HMI | |
+ * | | | |enabled | |
+ * | | | |for this| |
+ * | | | |bit ? | |
+ * +===+==========+============================+========+===================+
+ * |0 |malfunctio|A processor core in the |Yes |Raise attn from |
+ * | |n_allert |system has checkstopped | |sapphire resulting |
+ * | | |(failed recovery) and has | |xstop |
+ * | | |requested a CP Sparing | | |
+ * | | |to occur. This is | | |
+ * | | |broadcasted to every | | |
+ * | | |processor in the system | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |1 |Reserved |reserved |n/a | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |2 |proc_recv_|Processor recovery occurred |Yes |Log message and |
+ * | |done |error-bit in fir not masked | |continue working. |
+ * | | |(see bit 11) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |3 |proc_recv_|Processor went through |Yes |Log message and |
+ * | |error_mask|recovery for an error which | |continue working. |
+ * | |ed |is actually masked for | | |
+ * | | |reporting | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |4 | |Timer facility experienced |Yes |Raise attn from |
+ * | |tfac_error|an error. | |sapphire resulting |
+ * | | |TB, DEC, HDEC, PURR or SPURR| |xstop |
+ * | | |may be corrupted (details in| | |
+ * | | |TFMR) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |5 | |TFMR SPR itself is |Yes |Raise attn from |
+ * | |tfmr_parit|corrupted. | |sapphire resulting |
+ * | |y_error |Entire timing facility may | |xstop |
+ * | | |be compromised. | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |6 |ha_overflo| UPS (Uniterrupted Power |No |N/A |
+ * | |w_warning |System) Overflow indication | | |
+ * | | |indicating that the UPS | | |
+ * | | |DirtyAddrTable has | | |
+ * | | |reached a limit where it | | |
+ * | | |requires PHYP unload support| | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |7 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |8 |xscom_fail|An XSCOM operation caused by|No |We handle it by |
+ * | | |a cache inhibited load/store| |manually reading |
+ * | | |from this thread failed. A | |HMER register. |
+ * | | |trap register is | | |
+ * | | |available. | | |
+ * | | | | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |9 |xscom_done|An XSCOM operation caused by|No |We handle it by |
+ * | | |a cache inhibited load/store| |manually reading |
+ * | | |from this thread completed. | |HMER register. |
+ * | | |If hypervisor | | |
+ * | | |intends to use this bit, it | | |
+ * | | |is responsible for clearing | | |
+ * | | |it before performing the | | |
+ * | | |xscom operation. | | |
+ * | | |NOTE: this bit should always| | |
+ * | | |be masked in HMEER | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |10 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |11 |proc_recv_|Processor recovery occurred |y |Log message and |
+ * | |again |again before bit2 or bit3 | |continue working. |
+ * | | |was cleared | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |12-|reserved |was temperature sensor |n/a |n/a |
+ * |15 | |passed the critical point on| | |
+ * | | |the way up | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |16 | |SCOM has set a reserved FIR |No |n/a |
+ * | |scom_fir_h|bit to cause recovery | | |
+ * | |m | | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |17 |trig_fir_h|Debug trigger has set a |No |n/a |
+ * | |mi |reserved FIR bit to cause | | |
+ * | | |recovery | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |18 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |19 |reserved |reserved |n/a |n/a |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |20 |hyp_resour|A hypervisor resource error |y |Raise attn from |
+ * | |ce_err |occurred: data parity error | |sapphire resulting |
+ * | | |on, SPRC0:3; SPR_Modereg or | |xstop. |
+ * | | |HMEER. | | |
+ * | | |Note: this bit will cause an| | |
+ * | | |check_stop when (HV=1, PR=0 | | |
+ * | | |and EE=0) | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |21-| |if bit 8 is active, the |No |We handle it by |
+ * |23 |xscom_stat|reason will be detailed in | |Manually reading |
+ * | |us |these bits. see chapter 11.1| |HMER register. |
+ * | | |This bits are information | | |
+ * | | |only and always masked | | |
+ * | | |(mask = '0') | | |
+ * | | |If hypervisor intends to use| | |
+ * | | |this bit, it is responsible | | |
+ * | | |for clearing it before | | |
+ * | | |performing the xscom | | |
+ * | | |operation. | | |
+ * |---+----------+----------------------------+--------+-------------------|
+ * |24-|Not |Not implemented |n/a |n/a |
+ * |63 |implemente| | | |
+ * | |d | | | |
+ * +-- +----------+----------------------------+--------+-------------------+
+ *
+ * Above HMER bits can be enabled/disabled by modifying
+ * SPR_HMEER_HMI_ENABLE_MASK #define in include/processor.h
+ * If you modify support for any of the bits listed above, please make sure
+ * you change the above table to refelct that.
+ *
+ * NOTE: Per Dave Larson, never enable 8,9,21-23
+ */
+
+/* make compiler happy with a prototype */
+void handle_hmi(struct stack_frame *stack);
+
+void handle_hmi(struct stack_frame *stack)
+{
+ uint64_t hmer, orig_hmer;
+ bool assert = false;
+
+ orig_hmer = hmer = mfspr(SPR_HMER);
+ printf("HMI: Received HMI interrupt: HMER = 0x%016llx\n", hmer);
+ if (hmer & (SPR_HMER_PROC_RECV_DONE
+ | SPR_HMER_PROC_RECV_ERROR_MASKED)) {
+ hmer &= ~(SPR_HMER_PROC_RECV_DONE
+ | SPR_HMER_PROC_RECV_ERROR_MASKED);
+ printf("HMI: Processor recovery Done.\n");
+ }
+ if (hmer & SPR_HMER_PROC_RECV_AGAIN) {
+ hmer &= ~SPR_HMER_PROC_RECV_AGAIN;
+ printf("HMI: Processor recovery occurred again before"
+ "bit2 was cleared\n");
+ }
+ /* Assert if we see malfunction alert, we can not continue. */
+ if (hmer & SPR_HMER_MALFUNCTION_ALERT) {
+ hmer &= ~SPR_HMER_MALFUNCTION_ALERT;
+ assert = true;
+ }
+
+ /* Assert if we see Hypervisor resource error, we can not continue. */
+ if (hmer & SPR_HMER_HYP_RESOURCE_ERR) {
+ hmer &= ~SPR_HMER_HYP_RESOURCE_ERR;
+ assert = true;
+ }
+
+ /*
+ * Assert for now for all TOD errors. In future we need to decode
+ * TFMR and take corrective action wherever required.
+ */
+ if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) {
+ hmer &= ~(SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR);
+ assert = true;
+ }
+
+ /*
+ * HMER bits are sticky, once set to 1 they remain set to 1 until
+ * they are set to 0. Reset the error source bit to 0, otherwise
+ * we keep getting HMI interrupt again and again.
+ */
+ mtspr(SPR_HMER, hmer);
+ if (!assert)
+ return;
+
+ /*
+ * Raise attn to crash.
+ *
+ * We get HMI on all threads at the same time. Using locks to avoid
+ * printf messages jumbled up.
+ */
+ lock(&hmi_lock);
+ dump_regs(stack, orig_hmer);
+ /* Should we unlock? We are going down anyway. */
+ unlock(&hmi_lock);
+ assert(false);
+}
+
+/* Called from head.S, thus no prototype */
+void exception_entry(struct stack_frame *stack);
+
+void exception_entry(struct stack_frame *stack)
+{
+ switch(stack->type) {
+ case STACK_ENTRY_MCHECK:
+ handle_machine_check(stack);
+ break;
+ case STACK_ENTRY_HMI:
+ handle_hmi(stack);
+ /* XXX TODO : Implement machine check */
+ break;
+ case STACK_ENTRY_SOFTPATCH:
+ /* XXX TODO : Implement softpatch ? */
+ break;
+ }
+}
+
+static int64_t patch_exception(uint64_t vector, uint64_t glue, bool hv)
+{
+ uint64_t iaddr;
+
+ /* Copy over primary exception handler */
+ memcpy((void *)vector, &exc_primary_start,
+ &exc_primary_end - &exc_primary_start);
+
+ /* Patch branch instruction in primary handler */
+ iaddr = vector + exc_primary_patch_branch;
+ *(uint32_t *)iaddr |= (glue - iaddr) & 0x03fffffc;
+
+ /* Copy over secondary exception handler */
+ memcpy((void *)glue, &exc_secondary_start,
+ &exc_secondary_end - &exc_secondary_start);
+
+ /* Patch-in the vector number */
+ *(uint32_t *)(glue + exc_secondary_patch_type) |= vector;
+
+ /*
+ * If machine check, patch GET_STACK to get to the MC stack
+ * instead of the normal stack.
+ *
+ * To simplify the arithmetic involved I make assumptions
+ * on the fact that the base of all CPU stacks is 64k aligned
+ * and that our stack size is < 32k, which means that the
+ * "addi" instruction used in GET_STACK() is always using a
+ * small (<32k) positive offset, which we can then easily
+ * fixup with a simple addition
+ */
+ BUILD_ASSERT(STACK_SIZE < 0x8000);
+ BUILD_ASSERT(!(CPU_STACKS_BASE & 0xffff));
+
+ if (vector == 0x200) {
+ /*
+ * The addi we try to patch is the 3rd instruction
+ * of GET_STACK(). If you change the macro, you must
+ * update this code
+ */
+ iaddr = glue + exc_secondary_patch_stack + 8;
+ *(uint32_t *)iaddr += MC_STACK_SIZE;
+ }
+
+ /* Standard exception ? All done */
+ if (!hv)
+ goto flush;
+
+ /* HV exception, change the SRR's to HSRRs and rfid to hrfid
+ *
+ * The magic is that mfspr/mtspr of SRR can be turned into the
+ * equivalent HSRR version by OR'ing 0x4800. For rfid to hrfid
+ * we OR 0x200.
+ */
+ *(uint32_t *)(glue + exc_secondary_patch_mfsrr0) |= 0x4800;
+ *(uint32_t *)(glue + exc_secondary_patch_mfsrr1) |= 0x4800;
+ *(uint32_t *)(glue + exc_secondary_patch_mtsrr0) |= 0x4800;
+ *(uint32_t *)(glue + exc_secondary_patch_mtsrr1) |= 0x4800;
+ *(uint32_t *)(glue + exc_secondary_patch_rfid) |= 0x200;
+
+ flush:
+ /* On P7 and later all we need is : */
+ sync_icache();
+
+ return OPAL_SUCCESS;
+}
+
+static int64_t opal_register_exc_handler(uint64_t opal_exception,
+ uint64_t handler_address,
+ uint64_t glue_cache_line)
+{
+ switch(opal_exception) {
+ case OPAL_MACHINE_CHECK_HANDLER:
+ client_mc_address = handler_address;
+ return patch_exception(0x200, glue_cache_line, false);
+ case OPAL_HYPERVISOR_MAINTENANCE_HANDLER:
+ return patch_exception(0xe60, glue_cache_line, true);
+#if 0 /* We let Linux handle softpatch */
+ case OPAL_SOFTPATCH_HANDLER:
+ return patch_exception(0x1500, glue_cache_line, true);
+#endif
+ default:
+ break;
+ }
+ return OPAL_PARAMETER;
+}
+opal_call(OPAL_REGISTER_OPAL_EXCEPTION_HANDLER, opal_register_exc_handler, 3);
+
diff --git a/core/fast-reboot.c b/core/fast-reboot.c
new file mode 100644
index 0000000..49b80b6
--- /dev/null
+++ b/core/fast-reboot.c
@@ -0,0 +1,346 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <fsp.h>
+#include <psi.h>
+#include <opal.h>
+#include <xscom.h>
+#include <interrupts.h>
+#include <cec.h>
+#include <timebase.h>
+#include <memory.h>
+#include <pci.h>
+#include <chip.h>
+
+/*
+ * To get control of all threads, we sreset them via XSCOM after
+ * patching the 0x100 vector. This will work as long as the target
+ * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider
+ * a more messy approach.
+ *
+ * The SCOM register we want is called "Core RAS Control" in the doc
+ * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list
+ *
+ * Bits in there change from CPU rev to CPU rev but the bit we care
+ * about, bit 60 "sreset_request" appears to have stuck to the same
+ * place in both P7 and P7+. The register also has the same SCOM
+ * address
+ */
+#define EX0_TCTL_DIRECT_CONTROLS0 0x08010400
+#define EX0_TCTL_DIRECT_CONTROLS1 0x08010440
+#define EX0_TCTL_DIRECT_CONTROLS2 0x08010480
+#define EX0_TCTL_DIRECT_CONTROLS3 0x080104c0
+#define TCTL_DC_SRESET_REQUEST PPC_BIT(60)
+
+/* Flag tested by the OPAL entry code */
+uint8_t reboot_in_progress;
+static struct cpu_thread *resettor, *resettee;
+
+static void flush_caches(void)
+{
+ uint64_t base = SKIBOOT_BASE;
+ uint64_t end = base + SKIBOOT_SIZE;
+
+ /* Not sure what the effect of sreset is on cores, so let's
+ * shoot a series of dcbf's on all cachelines that make up
+ * our core memory just in case...
+ */
+ while(base < end) {
+ asm volatile("dcbf 0,%0" : : "r" (base) : "memory");
+ base += 128;
+ }
+ sync();
+}
+
+static bool do_reset_core_p7(struct cpu_thread *cpu)
+{
+ uint32_t xscom_addr, chip;
+ uint64_t ctl;
+ int rc;
+
+ /* Add the Core# */
+ xscom_addr = EX0_TCTL_DIRECT_CONTROLS0;
+ xscom_addr |= ((cpu->pir >> 2) & 7) << 24;
+
+ chip = pir_to_chip_id(cpu->pir);
+
+ ctl = TCTL_DC_SRESET_REQUEST;
+ rc = xscom_write(chip, xscom_addr, ctl);
+ rc |= xscom_write(chip, xscom_addr + 0x40, ctl);
+ rc |= xscom_write(chip, xscom_addr + 0x80, ctl);
+ rc |= xscom_write(chip, xscom_addr + 0xc0, ctl);
+ if (rc) {
+ prerror("RESET: Error %d resetting CPU 0x%04x\n",
+ rc, cpu->pir);
+ return false;
+ }
+ return true;
+}
+
+static void fast_reset_p7(void)
+{
+ struct cpu_thread *cpu;
+
+ resettee = this_cpu();
+ resettor = NULL;
+
+ /* Pick up a candidate resettor. We do that before we flush
+ * the caches
+ */
+ for_each_cpu(cpu) {
+ /*
+ * Some threads might still be in skiboot.
+ *
+ * But because we deal with entire cores and we don't want
+ * to special case things, we are just going to reset them
+ * too making the assumption that this is safe, they are
+ * holding no locks. This can only be true if they don't
+ * have jobs scheduled which is hopefully the case.
+ */
+ if (cpu->state != cpu_state_os &&
+ cpu->state != cpu_state_active)
+ continue;
+
+ /*
+ * Only hit cores and only if they aren't on the same core
+ * as ourselves
+ */
+ if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
+ cpu->pir & 0x3)
+ continue;
+
+ /* Pick up one of those guys as our "resettor". It will be
+ * in charge of resetting this CPU. We avoid resetting
+ * ourselves, not sure how well it would do with SCOM
+ */
+ resettor = cpu;
+ break;
+ }
+
+ if (!resettor) {
+ printf("RESET: Can't find a resettor !\n");
+ return;
+ }
+ printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n",
+ this_cpu()->pir, resettor->pir);
+
+ printf("RESET: Flushing caches...\n");
+
+ /* Is that necessary ? */
+ flush_caches();
+
+ /* Reset everybody except self and except resettor */
+ for_each_cpu(cpu) {
+ if (cpu->state != cpu_state_os &&
+ cpu->state != cpu_state_active)
+ continue;
+ if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) ||
+ cpu->pir & 0x3)
+ continue;
+ if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor))
+ continue;
+
+ printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir);
+
+ if (!do_reset_core_p7(cpu))
+ return;
+ }
+
+ /* Reset the resettor last because it's going to kill me ! */
+ printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir);
+ if (!do_reset_core_p7(resettor))
+ return;
+
+ /* Don't return */
+ for (;;)
+ ;
+}
+
+void fast_reset(void)
+{
+ uint32_t pvr = mfspr(SPR_PVR);
+ extern uint32_t fast_reset_patch_start;
+ extern uint32_t fast_reset_patch_end;
+ uint32_t *dst, *src;
+
+ printf("RESET: Fast reboot request !\n");
+
+ /* XXX We need a way to ensure that no other CPU is in skiboot
+ * holding locks (via the OPAL APIs) and if they are, we need
+ * for them to get out
+ */
+ reboot_in_progress = 1;
+ time_wait_ms(200);
+
+ /* Copy reset trampoline */
+ printf("RESET: Copying reset trampoline...\n");
+ src = &fast_reset_patch_start;
+ dst = (uint32_t *)0x100;
+ while(src < &fast_reset_patch_end)
+ *(dst++) = *(src++);
+ sync_icache();
+
+ switch(PVR_TYPE(pvr)) {
+ case PVR_TYPE_P7:
+ case PVR_TYPE_P7P:
+ fast_reset_p7();
+ }
+}
+
+static void cleanup_cpu_state(void)
+{
+ if (cpu_is_thread0(this_cpu())) {
+ cleanup_tlb();
+ init_shared_sprs();
+ }
+ init_replicated_sprs();
+ reset_cpu_icp();
+}
+
+#ifdef FAST_REBOOT_CLEARS_MEMORY
+static void fast_mem_clear(uint64_t start, uint64_t end)
+{
+ printf("MEMORY: Clearing %llx..%llx\n", start, end);
+
+ while(start < end) {
+ asm volatile("dcbz 0,%0" : : "r" (start) : "memory");
+ start += 128;
+ }
+}
+
+static void memory_reset(void)
+{
+ struct address_range *i;
+ uint64_t skistart = SKIBOOT_BASE;
+ uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE;
+
+ printf("MEMORY: Clearing ...\n");
+
+ list_for_each(&address_ranges, i, list) {
+ uint64_t start = cleanup_addr(i->arange->start);
+ uint64_t end = cleanup_addr(i->arange->end);
+
+ if (start >= skiend || end <= skistart)
+ fast_mem_clear(start, end);
+ else {
+ if (start < skistart)
+ fast_mem_clear(start, skistart);
+ if (end > skiend)
+ fast_mem_clear(skiend, end);
+ }
+ }
+}
+#endif /* FAST_REBOOT_CLEARS_MEMORY */
+
+/* Entry from asm after a fast reset */
+void fast_reboot(void);
+
+void fast_reboot(void)
+{
+ static volatile bool fast_boot_release;
+ struct cpu_thread *cpu;
+
+ printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir);
+
+ /* If this CPU was chosen as the resettor, it must reset the
+ * resettee (the one that initiated the whole process
+ */
+ if (this_cpu() == resettor)
+ do_reset_core_p7(resettee);
+
+ /* Are we the original boot CPU ? If not, we spin waiting
+ * for a relase signal from CPU 1, then we clean ourselves
+ * up and go processing jobs.
+ */
+ if (this_cpu() != boot_cpu) {
+ this_cpu()->state = cpu_state_present;
+ while (!fast_boot_release) {
+ smt_very_low();
+ sync();
+ }
+ smt_medium();
+ cleanup_cpu_state();
+ __secondary_cpu_entry();
+ }
+
+ /* We are the original boot CPU, wait for secondaries to
+ * be captured
+ */
+ for_each_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+
+ /* XXX Add a callin timeout ? */
+ while (cpu->state != cpu_state_present) {
+ smt_very_low();
+ sync();
+ }
+ smt_medium();
+ }
+
+ printf("INIT: Releasing secondaries...\n");
+
+ /* Release everybody */
+ fast_boot_release = true;
+ sync();
+
+ /* Wait for them to respond */
+ for_each_cpu(cpu) {
+ if (cpu == this_cpu())
+ continue;
+
+ /* XXX Add a callin timeout ? */
+ while (cpu->state == cpu_state_present) {
+ smt_very_low();
+ sync();
+ }
+ }
+
+ printf("INIT: All done, resetting everything else...\n");
+
+ /* Clear release flag for next time */
+ fast_boot_release = false;
+ reboot_in_progress = 0;
+
+ /* Cleanup ourselves */
+ cleanup_cpu_state();
+
+ /* Set our state to active */
+ this_cpu()->state = cpu_state_active;
+
+ /* Poke the consoles (see comments in the code there) */
+ fsp_console_reset();
+
+ /* Reset/EOI the PSI interrupt */
+ psi_irq_reset();
+
+ /* Remove all PCI devices */
+ pci_reset();
+
+ /* Reset IO Hubs */
+ cec_reset();
+
+ /* Re-Initialize all discovered PCI slots */
+ pci_init_slots();
+
+ /* Clear memory */
+#ifdef FAST_REBOOT_CLEARS_MEMORY
+ memory_reset();
+#endif
+ load_and_boot_kernel(true);
+}
diff --git a/core/fdt.c b/core/fdt.c
new file mode 100644
index 0000000..62e60fc
--- /dev/null
+++ b/core/fdt.c
@@ -0,0 +1,208 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <stdarg.h>
+#include <libfdt.h>
+#include <device.h>
+#include <cpu.h>
+#include <memory.h>
+#include <opal.h>
+#include <interrupts.h>
+#include <fsp.h>
+#include <cec.h>
+#include <vpd.h>
+#include <ccan/str/str.h>
+
+static int fdt_error;
+static void *fdt;
+
+#undef DEBUG_FDT
+
+static void __save_err(int err, const char *str)
+{
+#ifdef DEBUG_FDT
+ printf("FDT: rc: %d from \"%s\"\n", err, str);
+#endif
+ if (err && !fdt_error) {
+ prerror("FDT: Error %d from \"%s\"\n", err, str);
+ fdt_error = err;
+ }
+}
+
+#define save_err(...) __save_err(__VA_ARGS__, #__VA_ARGS__)
+
+static void dt_property_cell(const char *name, u32 cell)
+{
+ save_err(fdt_property_cell(fdt, name, cell));
+}
+
+static void dt_begin_node(const char *name, uint32_t phandle)
+{
+ save_err(fdt_begin_node(fdt, name));
+
+ /*
+ * We add both the new style "phandle" and the legacy
+ * "linux,phandle" properties
+ */
+ dt_property_cell("linux,phandle", phandle);
+ dt_property_cell("phandle", phandle);
+}
+
+static void dt_property(const char *name, const void *val, size_t size)
+{
+ save_err(fdt_property(fdt, name, val, size));
+}
+
+static void dt_end_node(void)
+{
+ save_err(fdt_end_node(fdt));
+}
+
+static void dump_fdt(void)
+{
+#ifdef DEBUG_FDT
+ int i, off, depth, err;
+
+ printf("Device tree %u@%p\n", fdt_totalsize(fdt), fdt);
+
+ err = fdt_check_header(fdt);
+ if (err) {
+ prerror("fdt_check_header: %s\n", fdt_strerror(err));
+ return;
+ }
+ printf("fdt_check_header passed\n");
+
+ printf("fdt_num_mem_rsv = %u\n", fdt_num_mem_rsv(fdt));
+ for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+ u64 addr, size;
+
+ err = fdt_get_mem_rsv(fdt, i, &addr, &size);
+ if (err) {
+ printf(" ERR %s\n", fdt_strerror(err));
+ return;
+ }
+ printf(" mem_rsv[%i] = %lu@%#lx\n", i, (long)addr, (long)size);
+ }
+
+ for (off = fdt_next_node(fdt, 0, &depth);
+ off > 0;
+ off = fdt_next_node(fdt, off, &depth)) {
+ int len;
+ const char *name;
+
+ name = fdt_get_name(fdt, off, &len);
+ if (!name) {
+ prerror("fdt: offset %i no name!\n", off);
+ return;
+ }
+ printf("name: %s [%u]\n", name, off);
+ }
+#endif
+}
+
+static void flatten_dt_node(const struct dt_node *root)
+{
+ const struct dt_node *i;
+ const struct dt_property *p;
+
+#ifdef DEBUG_FDT
+ printf("FDT: node: %s\n", root->name);
+#endif
+
+ list_for_each(&root->properties, p, list) {
+ if (strstarts(p->name, DT_PRIVATE))
+ continue;
+#ifdef DEBUG_FDT
+ printf("FDT: prop: %s size: %ld\n", p->name, p->len);
+#endif
+ dt_property(p->name, p->prop, p->len);
+ }
+
+ list_for_each(&root->children, i, list) {
+ dt_begin_node(i->name, i->phandle);
+ flatten_dt_node(i);
+ dt_end_node();
+ }
+}
+
+static void create_dtb_reservemap(const struct dt_node *root)
+{
+ uint64_t base, size;
+ const uint64_t *ranges;
+ const struct dt_property *prop;
+ int i;
+
+ /* Duplicate the reserved-ranges property into the fdt reservemap */
+ prop = dt_find_property(root, "reserved-ranges");
+ if (prop) {
+ ranges = (const void *)prop->prop;
+
+ for (i = 0; i < prop->len / (sizeof(uint64_t) * 2); i++) {
+ base = *(ranges++);
+ size = *(ranges++);
+ save_err(fdt_add_reservemap_entry(fdt, base, size));
+ }
+ }
+
+ save_err(fdt_finish_reservemap(fdt));
+}
+
+void *create_dtb(const struct dt_node *root)
+{
+ size_t len = DEVICE_TREE_MAX_SIZE;
+ uint32_t old_last_phandle = last_phandle;
+
+ do {
+ if (fdt)
+ free(fdt);
+ last_phandle = old_last_phandle;
+ fdt_error = 0;
+ fdt = malloc(len);
+ if (!fdt) {
+ prerror("dtb: could not malloc %lu\n", (long)len);
+ return NULL;
+ }
+
+ fdt_create(fdt, len);
+
+ create_dtb_reservemap(root);
+
+ /* Open root node */
+ dt_begin_node(root->name, root->phandle);
+
+ /* Unflatten our live tree */
+ flatten_dt_node(root);
+
+ /* Close root node */
+ dt_end_node();
+
+ save_err(fdt_finish(fdt));
+
+ if (!fdt_error)
+ break;
+
+ len *= 2;
+ } while (fdt_error == -FDT_ERR_NOSPACE);
+
+ dump_fdt();
+
+ if (fdt_error) {
+ prerror("dtb: error %s\n", fdt_strerror(fdt_error));
+ return NULL;
+ }
+ return fdt;
+}
diff --git a/core/flash-nvram.c b/core/flash-nvram.c
new file mode 100644
index 0000000..7e261b1
--- /dev/null
+++ b/core/flash-nvram.c
@@ -0,0 +1,76 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <skiboot.h>
+#include <device.h>
+#include <console.h>
+#include <opal.h>
+#include <platform.h>
+#include <libflash/libflash.h>
+
+static struct flash_chip *fl_nv_chip;
+static uint32_t fl_nv_start, fl_nv_size;
+
+static int flash_nvram_info(uint32_t *total_size)
+{
+ if (!fl_nv_chip)
+ return OPAL_HARDWARE;
+ *total_size = fl_nv_size;
+ return OPAL_SUCCESS;
+}
+
+static int flash_nvram_start_read(void *dst, uint32_t src, uint32_t len)
+{
+ int rc;
+
+ if ((src + len) > fl_nv_size) {
+ prerror("FLASH_NVRAM: read out of bound (0x%x,0x%x)\n",
+ src, len);
+ return OPAL_PARAMETER;
+ }
+ rc = flash_read(fl_nv_chip, fl_nv_start + src, dst, len);
+ if (rc)
+ return rc;
+ nvram_read_complete(true);
+ return 0;
+}
+
+static int flash_nvram_write(uint32_t dst, void *src, uint32_t len)
+{
+ /* TODO: When we have async jobs for PRD, turn this into one */
+
+ if ((dst + len) > fl_nv_size) {
+ prerror("FLASH_NVRAM: write out of bound (0x%x,0x%x)\n",
+ dst, len);
+ return OPAL_PARAMETER;
+ }
+ return flash_smart_write(fl_nv_chip, fl_nv_start + dst, src, len);
+}
+
+int flash_nvram_init(struct flash_chip *chip, uint32_t start, uint32_t size)
+{
+ fl_nv_chip = chip;
+ fl_nv_start = start;
+ fl_nv_size = size;
+
+ platform.nvram_info = flash_nvram_info;
+ platform.nvram_start_read = flash_nvram_start_read;
+ platform.nvram_write = flash_nvram_write;
+
+ return 0;
+}
+
diff --git a/core/hostservices.c b/core/hostservices.c
new file mode 100644
index 0000000..85e62e3
--- /dev/null
+++ b/core/hostservices.c
@@ -0,0 +1,826 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <lock.h>
+#include <device.h>
+#include <compiler.h>
+#include <hostservices.h>
+#include <mem_region.h>
+#include <xscom.h>
+#include <fsp.h>
+#include <chip.h>
+#include <console.h>
+#include <mem-map.h>
+#include <timebase.h>
+
+#define HOSTBOOT_RUNTIME_INTERFACE_VERSION 1
+
+struct host_interfaces {
+ /** Interface version. */
+ uint64_t interface_version;
+
+ /** Put a string to the console. */
+ void (*puts)(const char*);
+ /** Critical failure in runtime execution. */
+ void (*assert)(void);
+
+ /** OPTIONAL. Hint to environment that the page may be executed. */
+ int (*set_page_execute)(void*);
+
+ /** malloc */
+ void *(*malloc)(size_t);
+ /** free */
+ void (*free)(void*);
+ /** realloc */
+ void *(*realloc)(void*, size_t);
+
+ /** sendErrorLog
+ * @param[in] plid Platform Log identifier
+ * @param[in] data size in bytes
+ * @param[in] pointer to data
+ * @return 0 on success else error code
+ */
+ int (*send_error_log)(uint32_t,uint32_t,void *);
+
+ /** Scan communication read
+ * @param[in] chip_id (based on devtree defn)
+ * @param[in] address
+ * @param[in] pointer to 8-byte data buffer
+ * @return 0 on success else return code
+ */
+ int (*scom_read)(uint64_t, uint64_t, void*);
+
+ /** Scan communication write
+ * @param[in] chip_id (based on devtree defn)
+ * @param[in] address
+ * @param[in] pointer to 8-byte data buffer
+ * @return 0 on success else return code
+ */
+ int (*scom_write)(uint64_t, uint64_t, const void *);
+
+ /** lid_load
+ * Load a LID from PNOR, FSP, etc.
+ *
+ * @param[in] LID number.
+ * @param[out] Allocated buffer for LID.
+ * @param[out] Size of LID (in bytes).
+ *
+ * @return 0 on success, else RC.
+ */
+ int (*lid_load)(uint32_t lid, void **buf, size_t *len);
+
+ /** lid_unload
+ * Release memory from previously loaded LID.
+ *
+ * @param[in] Allocated buffer for LID to release.
+ *
+ * @return 0 on success, else RC.
+ */
+ int (*lid_unload)(void *buf);
+
+ /** Get the address of a reserved memory region by its devtree name.
+ *
+ * @param[in] Devtree name (ex. "ibm,hbrt-vpd-image")
+ * @return physical address of region (or NULL).
+ **/
+ uint64_t (*get_reserved_mem)(const char*);
+
+ /**
+ * @brief Force a core to be awake, or clear the force
+ * @param[in] i_core Core to wake up (pid)
+ * @param[in] i_mode 0=force awake
+ * 1=clear force
+ * 2=clear all previous forces
+ * @return rc non-zero on error
+ */
+ int (*wakeup)( uint32_t i_core, uint32_t i_mode );
+
+ /**
+ * @brief Delay/sleep for at least the time given
+ * @param[in] seconds
+ * @param[in] nano seconds
+ */
+ void (*nanosleep)(uint64_t i_seconds, uint64_t i_nano_seconds);
+
+ // Reserve some space for future growth.
+ void (*reserved[32])(void);
+};
+
+struct runtime_interfaces {
+ /** Interface version. */
+ uint64_t interface_version;
+
+ /** Execute CxxTests that may be contained in the image.
+ *
+ * @param[in] - Pointer to CxxTestStats structure for results reporting.
+ */
+ void (*cxxtestExecute)(void *);
+ /** Get a list of lids numbers of the lids known to HostBoot
+ *
+ * @param[out] o_num - the number of lids in the list
+ * @return a pointer to the list
+ */
+ const uint32_t * (*get_lid_list)(size_t * o_num);
+
+ /** Load OCC Image and common data into mainstore, also setup OCC BARSs
+ *
+ * @param[in] i_homer_addr_phys - The physical mainstore address of the
+ * start of the HOMER image
+ * @param[in] i_homer_addr_va - Virtual memory address of the HOMER image
+ * @param[in] i_common_addr_phys - The physical mainstore address of the
+ * OCC common area.
+ * @param[in] i_common_addr_va - Virtual memory address of the common area
+ * @param[in] i_chip - The HW chip id (XSCOM chip ID)
+ * @return 0 on success else return code
+ */
+ int(*loadOCC)(uint64_t i_homer_addr_phys,
+ uint64_t i_homer_addr_va,
+ uint64_t i_common_addr_phys,
+ uint64_t i_common_addr_va,
+ uint64_t i_chip);
+
+ /** Start OCC on all chips, by module
+ *
+ * @param[in] i_chip - Array of functional HW chip ids
+ * @Note The caller must include a complete modules worth of chips
+ * @param[in] i_num_chips - Number of chips in the array
+ * @return 0 on success else return code
+ */
+ int (*startOCCs)(uint64_t* i_chip,
+ size_t i_num_chips);
+
+ /** Stop OCC hold OCCs in reset
+ *
+ * @param[in] i_chip - Array of functional HW chip ids
+ * @Note The caller must include a complete modules worth of chips
+ * @param[in] i_num_chips - Number of chips in the array
+ * @return 0 on success else return code
+ */
+ int (*stopOCCs)(uint64_t* i_chip,
+ size_t i_num_chips);
+
+ /* Reserve some space for future growth. */
+ void (*reserved[32])(void);
+};
+
+static struct runtime_interfaces *hservice_runtime;
+
+static char *hbrt_con_buf = (char *)HBRT_CON_START;
+static size_t hbrt_con_pos;
+static bool hbrt_con_wrapped;
+
+#define HBRT_CON_IN_LEN 0
+#define HBRT_CON_OUT_LEN (HBRT_CON_LEN - HBRT_CON_IN_LEN)
+
+struct memcons hbrt_memcons __section(".data.memcons") = {
+ .magic = MEMCONS_MAGIC,
+ .obuf_phys = HBRT_CON_START,
+ .ibuf_phys = HBRT_CON_START + HBRT_CON_OUT_LEN,
+ .obuf_size = HBRT_CON_OUT_LEN,
+ .ibuf_size = HBRT_CON_IN_LEN,
+};
+
+static void hservice_putc(char c)
+{
+ uint32_t opos;
+
+ hbrt_con_buf[hbrt_con_pos++] = c;
+ if (hbrt_con_pos >= HBRT_CON_OUT_LEN) {
+ hbrt_con_pos = 0;
+ hbrt_con_wrapped = true;
+ }
+
+ /*
+ * We must always re-generate memcons.out_pos because
+ * under some circumstances, the console script will
+ * use a broken putmemproc that does RMW on the full
+ * 8 bytes containing out_pos and in_prod, thus corrupting
+ * out_pos
+ */
+ opos = hbrt_con_pos;
+ if (hbrt_con_wrapped)
+ opos |= MEMCONS_OUT_POS_WRAP;
+ lwsync();
+ hbrt_memcons.out_pos = opos;
+}
+
+static void hservice_puts(const char *str)
+{
+ char c;
+
+ while((c = *(str++)) != 0)
+ hservice_putc(c);
+ hservice_putc(10);
+}
+
+static void hservice_mark(void)
+{
+ hservice_puts("--------------------------------------------------"
+ "--------------------------------------------------\n");
+}
+
+static void hservice_assert(void)
+{
+ prerror("HBRT: Assertion from hostservices\n");
+ abort();
+}
+
+static void *hservice_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static void hservice_free(void *ptr)
+{
+ free(ptr);
+}
+
+
+static void *hservice_realloc(void *ptr, size_t size)
+{
+ return realloc(ptr, size);
+}
+
+struct hbrt_elog_ent {
+ void *buf;
+ unsigned int size;
+ unsigned int plid;
+ struct list_node link;
+};
+static LIST_HEAD(hbrt_elogs);
+static struct lock hbrt_elog_lock = LOCK_UNLOCKED;
+static bool hbrt_elog_sending;
+static void hservice_start_elog_send(void);
+
+static void hservice_elog_write_complete(struct fsp_msg *msg)
+{
+ struct hbrt_elog_ent *ent = msg->user_data;
+
+ lock(&hbrt_elog_lock);
+ printf("HBRT: Completed send of PLID 0x%08x\n", ent->plid);
+ hbrt_elog_sending = false;
+ fsp_tce_unmap(PSI_DMA_HBRT_LOG_WRITE_BUF,
+ PSI_DMA_HBRT_LOG_WRITE_BUF_SZ);
+ free(ent->buf);
+ free(ent);
+ fsp_freemsg(msg);
+ hservice_start_elog_send();
+ unlock(&hbrt_elog_lock);
+}
+
+static void hservice_start_elog_send(void)
+{
+ struct fsp_msg *msg;
+ struct hbrt_elog_ent *ent;
+
+ again:
+ if (list_empty(&hbrt_elogs))
+ return;
+ ent = list_pop(&hbrt_elogs, struct hbrt_elog_ent, link);
+
+ hbrt_elog_sending = true;
+
+ printf("HBRT: Starting send of PLID 0x%08x\n", ent->plid);
+
+ fsp_tce_map(PSI_DMA_HBRT_LOG_WRITE_BUF, ent->buf,
+ PSI_DMA_HBRT_LOG_WRITE_BUF_SZ);
+
+ msg = fsp_mkmsg(FSP_CMD_WRITE_SP_DATA, 6, FSP_DATASET_HBRT_BLOB,
+ 0, 0, 0, PSI_DMA_HBRT_LOG_WRITE_BUF,
+ ent->size);
+
+ if (!msg) {
+ prerror("HBRT: Failed to create error msg log to FSP\n");
+ goto error;
+ }
+ msg->user_data = ent;
+ if (!fsp_queue_msg(msg, hservice_elog_write_complete))
+ return;
+ prerror("FSP: Error queueing elog update\n");
+ error:
+ if (msg)
+ fsp_freemsg(msg);
+ fsp_tce_unmap(PSI_DMA_HBRT_LOG_WRITE_BUF,
+ PSI_DMA_HBRT_LOG_WRITE_BUF_SZ);
+ free(ent->buf);
+ free(ent);
+ hbrt_elog_sending = false;
+ goto again;
+}
+
+static int hservice_send_error_log(uint32_t plid, uint32_t dsize, void *data)
+{
+ struct hbrt_elog_ent *ent;
+ void *abuf;
+
+ printf("HBRT: Error log generated with plid 0x%08x\n", plid);
+
+ /* We only know how to send error logs to FSP */
+ if (!fsp_present()) {
+ prerror("HBRT: Warning, error log from HBRT discarded !\n");
+ return OPAL_UNSUPPORTED;
+ }
+ if (dsize > PSI_DMA_HBRT_LOG_WRITE_BUF_SZ) {
+ prerror("HBRT: Warning, error log from HBRT too big (%d) !\n",
+ dsize);
+ dsize = PSI_DMA_HBRT_LOG_WRITE_BUF_SZ;
+ }
+
+ lock(&hbrt_elog_lock);
+
+ /* Create and populate a tracking structure */
+ ent = zalloc(sizeof(struct hbrt_elog_ent));
+ if (!ent) {
+ unlock(&hbrt_elog_lock);
+ return OPAL_NO_MEM;
+ }
+
+ /* Grab a 4k aligned page */
+ abuf = memalign(0x1000, PSI_DMA_HBRT_LOG_WRITE_BUF_SZ);
+ if (!abuf) {
+ free(ent);
+ unlock(&hbrt_elog_lock);
+ return OPAL_NO_MEM;
+ }
+ memset(abuf, 0, PSI_DMA_HBRT_LOG_WRITE_BUF_SZ);
+ memcpy(abuf, data, dsize);
+ ent->buf = abuf;
+ ent->size = dsize;
+ ent->plid = plid;
+ list_add_tail(&hbrt_elogs, &ent->link);
+ if (!hbrt_elog_sending)
+ hservice_start_elog_send();
+ unlock(&hbrt_elog_lock);
+
+ return 0;
+}
+
+static int hservice_scom_read(uint64_t chip_id, uint64_t addr, void *buf)
+{
+ return xscom_read(chip_id, addr, buf);
+}
+
+static int hservice_scom_write(uint64_t chip_id, uint64_t addr,
+ const void *buf)
+{
+ uint64_t val;
+
+ memcpy(&val, buf, sizeof(val));
+ return xscom_write(chip_id, addr, val);
+}
+
+static int hservice_lid_load(uint32_t lid, void **buf, size_t *len)
+{
+ int rc;
+ static void *lid_cache;
+ static size_t lid_cache_len;
+ static uint32_t lid_cache_id;
+
+ printf("HBRT: LID load request for 0x%08x\n", lid);
+
+ /* Adjust LID side first or we get a cache mismatch */
+ lid = fsp_adjust_lid_side(lid);
+
+ /* Check for cache */
+ if (lid_cache && lid_cache_id == lid) {
+ *buf = lid_cache;
+ *len = lid_cache_len;
+ printf("HBRT: Serviced from cache, len=0x%lx\n", lid_cache_len);
+ return 0;
+ }
+
+ /* Cache mismatch, discard old one */
+ if (lid_cache) {
+ printf("HBRT: Cache mismatch, discarding old 0x%08x\n",
+ lid_cache_id);
+ free(lid_cache);
+ lid_cache = NULL;
+ }
+
+ /* Allocate a new buffer and load the LID into it */
+ *buf = malloc(HBRT_LOAD_LID_SIZE);
+ *len = HBRT_LOAD_LID_SIZE;
+ rc = fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0, *buf, len);
+ if (rc != 0)
+ /* Take advantage of realloc corner case here. */
+ *len = 0;
+ *buf = realloc(*buf, *len);
+
+ /* We managed, let's cache it */
+ if (rc == 0 && *len) {
+ lid_cache = *buf;
+ lid_cache_len = *len;
+ lid_cache_id = lid;
+
+ printf("HBRT: LID 0x%08x successfully loaded and cached"
+ ", len=0x%lx\n", lid, lid_cache_len);
+ }
+
+ return rc;
+}
+
+static int hservice_lid_unload(void *buf __unused)
+{
+ /* We do nothing as the LID is held in cache */
+ return 0;
+}
+
+static uint64_t hservice_get_reserved_mem(const char *name)
+{
+ struct mem_region *region;
+ uint64_t ret;
+
+ /* We assume it doesn't change after we've unlocked it, but
+ * lock ensures list is safe to walk. */
+ lock(&mem_region_lock);
+ region = find_mem_region(name);
+ ret = region ? region->start : 0;
+ unlock(&mem_region_lock);
+
+ if (!ret)
+ prerror("HBRT: Mem region '%s' not found !\n", name);
+
+ return ret;
+}
+
+static void hservice_nanosleep(uint64_t i_seconds, uint64_t i_nano_seconds)
+{
+ struct timespec ts;
+
+ ts.tv_sec = i_seconds;
+ ts.tv_nsec = i_nano_seconds;
+ nanosleep(&ts, NULL);
+}
+
+static int hservice_set_special_wakeup(struct cpu_thread *cpu)
+{
+ uint64_t val, core_id, poll_target, stamp;
+ int rc;
+
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
+
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not neccessary
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP),
+ PPC_BIT(0));
+ if (rc) {
+ prerror("HBRT: XSCOM error %d asserting special"
+ " wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+
+ /*
+ * HWP uses the history for Perf register here, dunno why it uses
+ * that one instead of the pHyp one, maybe to avoid clobbering it...
+ *
+ * In any case, it does that to check for run/nap vs.sleep/winkle/other
+ * to decide whether to poll on checkstop or not. Since we don't deal
+ * with checkstop conditions here, we ignore that part.
+ */
+
+ /*
+ * Now poll for completion of special wakeup. The HWP is nasty here,
+ * it will poll at 5ms intervals for up to 200ms. This is not quite
+ * acceptable for us at runtime, at least not until we have the
+ * ability to "context switch" HBRT. In practice, because we don't
+ * winkle, it will never take that long, so we increase the polling
+ * frequency to 1us per poll. However we do have to keep the same
+ * timeout.
+ *
+ * We don't use time_wait_ms() either for now as we don't want to
+ * poll the FSP here.
+ */
+ stamp = mftb();
+ poll_target = stamp + msecs_to_tb(200);
+ val = 0;
+ while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) {
+ /* Wait 1 us */
+ hservice_nanosleep(0, 1000);
+
+ /* Read PM state */
+ rc = xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0),
+ &val);
+ if (rc) {
+ prerror("HBRT: XSCOM error %d reading PM state on"
+ " 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+ /* Check timeout */
+ if (mftb() > poll_target)
+ break;
+ }
+
+ /* Success ? */
+ if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) {
+ uint64_t now = mftb();
+ printf("HBRT: Special wakeup complete after %ld us\n",
+ tb_to_usecs(now - stamp));
+ return 0;
+ }
+
+ /*
+ * We timed out ...
+ *
+ * HWP has a complex workaround for HW255321 which affects
+ * Murano DD1 and Venice DD1. Ignore that for now
+ *
+ * Instead we just dump some XSCOMs for error logging
+ */
+ prerror("HBRT: Timeout on special wakeup of 0x%0x\n", cpu->pir);
+ prerror("HBRT: PM0 = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+ prerror("HBRT: SPC_WKUP = 0x%016llx\n", val);
+ val = -1;
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_IDLE_STATE_HISTORY_PHYP),
+ &val);
+ prerror("HBRT: HISTORY = 0x%016llx\n", val);
+
+ return OPAL_HARDWARE;
+}
+
+static int hservice_clr_special_wakeup(struct cpu_thread *cpu)
+{
+ uint64_t val, core_id;
+ int rc;
+
+ /*
+ * Note: HWP checks for checkstops, but I assume we don't need to
+ * as we wouldn't be running if one was present
+ */
+
+ /* Grab core ID once */
+ core_id = pir_to_core_id(cpu->pir);
+
+ /*
+ * The original HWp reads the XSCOM first but ignores the result
+ * and error, let's do the same until I know for sure that is
+ * not neccessary
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ /* Then we write special wakeup */
+ rc = xscom_write(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id,
+ EX_PM_SPECIAL_WAKEUP_PHYP), 0);
+ if (rc) {
+ prerror("HBRT: XSCOM error %d deasserting"
+ " special wakeup on 0x%x\n", rc, cpu->pir);
+ return rc;
+ }
+
+ /*
+ * The original HWp reads the XSCOM again with the comment
+ * "This puts an inherent delay in the propagation of the reset
+ * transition"
+ */
+ xscom_read(cpu->chip_id,
+ XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP),
+ &val);
+
+ return 0;
+}
+
+static int hservice_wakeup(uint32_t i_core, uint32_t i_mode)
+{
+ struct cpu_thread *cpu;
+ int rc = OPAL_SUCCESS;
+
+ /*
+ * Mask out the top nibble of i_core since it may contain
+ * 0x4 (which we use for XSCOM targetting)
+ */
+ i_core &= 0x0fffffff;
+
+ /* What do we need to do ? */
+ switch(i_mode) {
+ case 0: /* Assert special wakeup */
+ /* XXX Assume P8 */
+ cpu = find_cpu_by_pir(i_core << 3);
+ if (!cpu)
+ return OPAL_PARAMETER;
+ printf("HBRT: Special wakeup assert for core 0x%x, count=%d\n",
+ i_core, cpu->hbrt_spec_wakeup);
+ if (cpu->hbrt_spec_wakeup == 0)
+ rc = hservice_set_special_wakeup(cpu);
+ if (rc == 0)
+ cpu->hbrt_spec_wakeup++;
+ return rc;
+ case 1: /* Deassert special wakeup */
+ /* XXX Assume P8 */
+ cpu = find_cpu_by_pir(i_core << 3);
+ if (!cpu)
+ return OPAL_PARAMETER;
+ printf("HBRT: Special wakeup release for core 0x%x, count=%d\n",
+ i_core, cpu->hbrt_spec_wakeup);
+ if (cpu->hbrt_spec_wakeup == 0) {
+ prerror("HBRT: Special wakeup clear"
+ " on core 0x%x with count=0\n",
+ i_core);
+ return OPAL_WRONG_STATE;
+ }
+ /* What to do with count on errors ? */
+ cpu->hbrt_spec_wakeup--;
+ if (cpu->hbrt_spec_wakeup == 0)
+ rc = hservice_clr_special_wakeup(cpu);
+ return rc;
+ case 2: /* Clear all special wakeups */
+ printf("HBRT: Special wakeup release for all cores\n");
+ for_each_cpu(cpu) {
+ if (cpu->hbrt_spec_wakeup) {
+ cpu->hbrt_spec_wakeup = 0;
+ /* What to do on errors ? */
+ hservice_clr_special_wakeup(cpu);
+ }
+ }
+ return OPAL_SUCCESS;
+ default:
+ return OPAL_PARAMETER;
+ }
+}
+
+static struct host_interfaces hinterface = {
+ .interface_version = HOSTBOOT_RUNTIME_INTERFACE_VERSION,
+ .puts = hservice_puts,
+ .assert = hservice_assert,
+ .malloc = hservice_malloc,
+ .free = hservice_free,
+ .realloc = hservice_realloc,
+ .send_error_log = hservice_send_error_log,
+ .scom_read = hservice_scom_read,
+ .scom_write = hservice_scom_write,
+ .lid_load = hservice_lid_load,
+ .lid_unload = hservice_lid_unload,
+ .get_reserved_mem = hservice_get_reserved_mem,
+ .wakeup = hservice_wakeup,
+ .nanosleep = hservice_nanosleep,
+};
+
+int host_services_occ_load(void)
+{
+ struct proc_chip *chip;
+ int rc = 0;
+
+ printf("HBRT: OCC Load requested\n");
+
+ if (!(hservice_runtime && hservice_runtime->loadOCC)) {
+ prerror("HBRT: No hservice_runtime->loadOCC\n");
+ return -ENOENT;
+ }
+
+ for_each_chip(chip) {
+
+ printf("HBRT: [%16lx] Calling loadOCC() homer %016llx, occ_common_area %016llx, "
+ "chip %04x\n",
+ mftb(),
+ chip->homer_base,
+ chip->occ_common_base,
+ chip->id);
+
+ rc = hservice_runtime->loadOCC(chip->homer_base,
+ chip->homer_base,
+ chip->occ_common_base,
+ chip->occ_common_base,
+ chip->id);
+
+ hservice_mark();
+ printf("HBRT: [%16lx] -> rc = %d\n", mftb(), rc);
+ }
+ return rc;
+}
+
+int host_services_occ_start(void)
+{
+ struct proc_chip *chip;
+ int i, rc = 0, nr_chips=0;
+ uint64_t chipids[MAX_CHIPS];
+
+ printf("HBRT: OCC Start requested\n");
+
+ if (!(hservice_runtime && hservice_runtime->startOCCs)) {
+ prerror("HBRT: No hservice_runtime->startOCCs\n");
+ return -ENOENT;
+ }
+
+ for_each_chip(chip) {
+ chipids[nr_chips++] = chip->id;
+ }
+
+ printf("HBRT: [%16lx] Calling startOCC() for IDs: ", mftb());
+ for (i = 0; i < nr_chips; i++)
+ printf("%04llx ", chipids[i]);
+ printf("\n");
+
+ /* Lets start all OCC */
+ rc = hservice_runtime->startOCCs(chipids, nr_chips);
+ hservice_mark();
+ printf("HBRT: [%16lx] -> rc = %d\n", mftb(), rc);
+ return rc;
+}
+
+void host_services_occ_base_setup(void)
+{
+ struct proc_chip *chip;
+ uint64_t occ_common;
+
+ chip = next_chip(NULL); /* Frist chip */
+ occ_common = (uint64_t) local_alloc(chip->id, OCC_COMMON_SIZE, OCC_COMMON_SIZE);
+
+ for_each_chip(chip) {
+ chip->occ_common_base = occ_common;
+ chip->occ_common_size = OCC_COMMON_SIZE;
+
+ chip->homer_base = (uint64_t) local_alloc(chip->id, HOMER_IMAGE_SIZE,
+ HOMER_IMAGE_SIZE);
+ chip->homer_size = HOMER_IMAGE_SIZE;
+ memset((void *)chip->homer_base, 0, chip->homer_size);
+
+ printf("HBRT: Chip %d HOMER base %016llx : %08llx "
+ "OCC common base %016llx : %08llx\n",
+ chip->id, chip->homer_base, chip->homer_size,
+ chip->occ_common_base, chip->occ_common_size);
+ }
+}
+
+bool hservices_init(void)
+{
+ void *code = NULL;
+ struct runtime_interfaces *(*hbrt_init)(struct host_interfaces *);
+
+ struct function_descriptor {
+ void *addr;
+ void *toc;
+ } fdesc;
+
+ code = (void *)hservice_get_reserved_mem("ibm,hbrt-code-image");
+ if (!code) {
+ prerror("HBRT: No ibm,hbrt-code-image found.\n");
+ return false;
+ }
+
+ if (memcmp(code, "HBRTVERS", 8) != 0) {
+ prerror("HBRT: Bad eyecatcher for ibm,hbrt-code-image!\n");
+ return false;
+ }
+
+ printf("HBRT: Found HostBoot Runtime version %llu\n", ((u64 *)code)[1]);
+
+ /* We enter at 0x100 into the image. */
+ fdesc.addr = code + 0x100;
+ /* It doesn't care about TOC */
+ fdesc.toc = 0;
+
+ hbrt_init = (void *)&fdesc;
+
+ hservice_runtime = hbrt_init(&hinterface);
+ hservice_mark();
+ if (!hservice_runtime) {
+ prerror("HBRT: Host services init failed\n");
+ return false;
+ }
+
+ printf("HBRT: Interface version %llu\n",
+ hservice_runtime->interface_version);
+
+ return true;
+}
diff --git a/core/init.c b/core/init.c
new file mode 100644
index 0000000..3d72ce5
--- /dev/null
+++ b/core/init.c
@@ -0,0 +1,687 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <fsp.h>
+#include <fsp-sysparam.h>
+#include <psi.h>
+#include <memory.h>
+#include <chiptod.h>
+#include <nx.h>
+#include <cpu.h>
+#include <processor.h>
+#include <xscom.h>
+#include <device_tree.h>
+#include <opal.h>
+#include <opal-msg.h>
+#include <elf.h>
+#include <io.h>
+#include <cec.h>
+#include <device.h>
+#include <pci.h>
+#include <lpc.h>
+#include <chip.h>
+#include <interrupts.h>
+#include <mem_region.h>
+#include <trace.h>
+#include <console.h>
+#include <fsi-master.h>
+#include <centaur.h>
+#include <libfdt/libfdt.h>
+#include <hostservices.h>
+
+/*
+ * Boot semaphore, incremented by each CPU calling in
+ *
+ * Forced into data section as it will be used before BSS is initialized
+ */
+enum ipl_state ipl_state = ipl_initial;
+enum proc_gen proc_gen;
+
+static uint64_t kernel_entry;
+static bool kernel_32bit;
+static void *fdt;
+
+struct debug_descriptor debug_descriptor = {
+ .eye_catcher = "OPALdbug",
+ .version = DEBUG_DESC_VERSION,
+ .memcons_phys = (uint64_t)&memcons,
+ .trace_mask = 0, /* All traces disabled by default */
+};
+
+static bool try_load_elf64_le(struct elf_hdr *header)
+{
+ struct elf64_hdr *kh = (struct elf64_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf64_phdr *ph;
+ unsigned int i;
+
+ printf("INIT: 64-bit LE kernel discovered\n");
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf64_phdr *)(load_base + le64_to_cpu(kh->e_phoff));
+ for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
+ if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
+ continue;
+ if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) ||
+ (le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) <
+ le64_to_cpu(kh->e_entry))
+ continue;
+
+ /* Get our entry */
+ kernel_entry = le64_to_cpu(kh->e_entry) -
+ le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset);
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+ kernel_entry += load_base;
+ kernel_32bit = false;
+
+ printf("INIT: 64-bit kernel entry at 0x%llx\n", kernel_entry);
+
+ return true;
+}
+
+static bool try_load_elf64(struct elf_hdr *header)
+{
+ struct elf64_hdr *kh = (struct elf64_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf64_phdr *ph;
+ unsigned int i;
+
+ /* Check it's a ppc64 LE ELF */
+ if (kh->ei_ident == ELF_IDENT &&
+ kh->ei_data == ELF_DATA_LSB &&
+ kh->e_machine == le16_to_cpu(ELF_MACH_PPC64)) {
+ return try_load_elf64_le(header);
+ }
+
+ /* Check it's a ppc64 ELF */
+ if (kh->ei_ident != ELF_IDENT ||
+ kh->ei_data != ELF_DATA_MSB ||
+ kh->e_machine != ELF_MACH_PPC64) {
+ prerror("INIT: Kernel doesn't look like an ppc64 ELF\n");
+ return false;
+ }
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf64_phdr *)(load_base + kh->e_phoff);
+ for (i = 0; i < kh->e_phnum; i++, ph++) {
+ if (ph->p_type != ELF_PTYPE_LOAD)
+ continue;
+ if (ph->p_vaddr > kh->e_entry ||
+ (ph->p_vaddr + ph->p_memsz) < kh->e_entry)
+ continue;
+
+ /* Get our entry */
+ kernel_entry = kh->e_entry - ph->p_vaddr + ph->p_offset;
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+ kernel_entry += load_base;
+ kernel_32bit = false;
+
+ printf("INIT: 64-bit kernel entry at 0x%llx\n", kernel_entry);
+
+ return true;
+}
+
+static bool try_load_elf32_le(struct elf_hdr *header)
+{
+ struct elf32_hdr *kh = (struct elf32_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf32_phdr *ph;
+ unsigned int i;
+
+ printf("INIT: 32-bit LE kernel discovered\n");
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf32_phdr *)(load_base + le32_to_cpu(kh->e_phoff));
+ for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) {
+ if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD)
+ continue;
+ if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) ||
+ (le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) <
+ le32_to_cpu(kh->e_entry))
+ continue;
+
+ /* Get our entry */
+ kernel_entry = le32_to_cpu(kh->e_entry) -
+ le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset);
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+
+ kernel_entry += load_base;
+ kernel_32bit = true;
+
+ printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
+
+ return true;
+}
+
+static bool try_load_elf32(struct elf_hdr *header)
+{
+ struct elf32_hdr *kh = (struct elf32_hdr *)header;
+ uint64_t load_base = (uint64_t)kh;
+ struct elf32_phdr *ph;
+ unsigned int i;
+
+ /* Check it's a ppc32 LE ELF */
+ if (header->ei_ident == ELF_IDENT &&
+ header->ei_data == ELF_DATA_LSB &&
+ header->e_machine == le16_to_cpu(ELF_MACH_PPC32)) {
+ return try_load_elf32_le(header);
+ }
+
+ /* Check it's a ppc32 ELF */
+ if (header->ei_ident != ELF_IDENT ||
+ header->ei_data != ELF_DATA_MSB ||
+ header->e_machine != ELF_MACH_PPC32) {
+ prerror("INIT: Kernel doesn't look like an ppc32 ELF\n");
+ return false;
+ }
+
+ /* Look for a loadable program header that has our entry in it
+ *
+ * Note that we execute the kernel in-place, we don't actually
+ * obey the load informations in the headers. This is expected
+ * to work for the Linux Kernel because it's a fairly dumb ELF
+ * but it will not work for any ELF binary.
+ */
+ ph = (struct elf32_phdr *)(load_base + kh->e_phoff);
+ for (i = 0; i < kh->e_phnum; i++, ph++) {
+ if (ph->p_type != ELF_PTYPE_LOAD)
+ continue;
+ if (ph->p_vaddr > kh->e_entry ||
+ (ph->p_vaddr + ph->p_memsz) < kh->e_entry)
+ continue;
+
+ /* Get our entry */
+ kernel_entry = kh->e_entry - ph->p_vaddr + ph->p_offset;
+ break;
+ }
+
+ if (!kernel_entry) {
+ prerror("INIT: Failed to find kernel entry !\n");
+ return false;
+ }
+
+ kernel_entry += load_base;
+ kernel_32bit = true;
+
+ printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry);
+
+ return true;
+}
+
+/* LID numbers. For now we hijack some of pHyp's own until i figure
+ * out the whole business with the MasterLID
+ */
+#define KERNEL_LID_PHYP 0x80a00701
+#define KERNEL_LID_OPAL 0x80f00101
+
+extern char __builtin_kernel_start[];
+extern char __builtin_kernel_end[];
+extern uint64_t boot_offset;
+
+static bool load_kernel(void)
+{
+ struct elf_hdr *kh;
+ uint32_t lid;
+ size_t ksize;
+ const char *ltype;
+
+ ltype = dt_prop_get_def(dt_root, "lid-type", NULL);
+
+ /* No lid-type, assume stradale, currently pre-loaded at fixed
+ * address
+ */
+ if (!ltype) {
+ printf("No lid-type property, assuming FSP-less setup\n");
+ ksize = __builtin_kernel_end - __builtin_kernel_start;
+ if (ksize) {
+ /* Move the built-in kernel up */
+ uint64_t builtin_base =
+ ((uint64_t)__builtin_kernel_start) -
+ SKIBOOT_BASE + boot_offset;
+ printf("Using built-in kernel\n");
+ memmove(KERNEL_LOAD_BASE, (void*)builtin_base, ksize);
+ } else
+ printf("Assuming kernel at 0x%p\n", KERNEL_LOAD_BASE);
+ } else {
+ ksize = KERNEL_LOAD_SIZE;
+
+ /* First try to load an OPAL secondary LID always */
+ lid = fsp_adjust_lid_side(KERNEL_LID_OPAL);
+ printf("Trying to load OPAL secondary LID...\n");
+ if (fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0,
+ KERNEL_LOAD_BASE, &ksize) != 0) {
+ if (!strcmp(ltype, "opal")) {
+ prerror("Failed to load in OPAL mode...\n");
+ return false;
+ }
+ printf("Trying to load as PHYP LID...\n");
+ lid = fsp_adjust_lid_side(KERNEL_LID_PHYP);
+ ksize = KERNEL_LOAD_SIZE;
+ if (fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0,
+ KERNEL_LOAD_BASE, &ksize) != 0) {
+ prerror("Failed to load kernel\n");
+ return false;
+ }
+ }
+ }
+
+ printf("INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n",
+ ksize);
+
+ kh = (struct elf_hdr *)KERNEL_LOAD_BASE;
+ if (kh->ei_class == ELF_CLASS_64)
+ return try_load_elf64(kh);
+ else if (kh->ei_class == ELF_CLASS_32)
+ return try_load_elf32(kh);
+
+ printf("INIT: Neither ELF32 not ELF64 ?\n");
+ return false;
+}
+
+void __noreturn load_and_boot_kernel(bool is_reboot)
+{
+ const struct dt_property *memprop;
+ uint64_t mem_top;
+
+ memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem");
+ if (memprop)
+ mem_top = (u64)dt_property_get_cell(memprop, 0) << 32
+ | dt_property_get_cell(memprop, 1);
+ else /* XXX HB hack, might want to calc it */
+ mem_top = 0x40000000;
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x000A);
+
+ /* Load kernel LID */
+ if (!load_kernel()) {
+ op_display(OP_FATAL, OP_MOD_INIT, 1);
+ abort();
+ }
+
+ if (!is_reboot) {
+ /* We wait for the nvram read to complete here so we can
+ * grab stuff from there such as the kernel arguments
+ */
+ fsp_nvram_wait_open();
+
+ /* Wait for FW VPD data read to complete */
+ fsp_code_update_wait_vpd(true);
+ }
+ fsp_console_select_stdout();
+
+ /*
+ * OCC takes few secs to boot. Call this as late as
+ * as possible to avoid delay.
+ */
+ occ_pstates_init();
+
+ /* Set kernel command line argument if specified */
+#ifdef KERNEL_COMMAND_LINE
+ dt_add_property_string(dt_chosen, "bootargs", KERNEL_COMMAND_LINE);
+#endif
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x000B);
+
+ /* Create the device tree blob to boot OS. */
+ fdt = create_dtb(dt_root);
+ if (!fdt) {
+ op_display(OP_FATAL, OP_MOD_INIT, 2);
+ abort();
+ }
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x000C);
+
+ /* Start the kernel */
+ if (!is_reboot)
+ op_panel_disable_src_echo();
+
+ /* Clear SRCs on the op-panel when Linux starts */
+ op_panel_clear_src();
+
+ cpu_give_self_os();
+
+ printf("INIT: Starting kernel at 0x%llx, fdt at %p (size 0x%x)\n",
+ kernel_entry, fdt, fdt_totalsize(fdt));
+
+ fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir);
+ if (kernel_32bit)
+ start_kernel32(kernel_entry, fdt, mem_top);
+ start_kernel(kernel_entry, fdt, mem_top);
+}
+
+static void dt_fixups(void)
+{
+ struct dt_node *n;
+ struct dt_node *primary_lpc = NULL;
+
+ /* lpc node missing #address/size cells. Also pick one as
+ * primary for now (TBD: How to convey that from HB)
+ */
+ dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") {
+ if (!primary_lpc || dt_has_node_property(n, "primary", NULL))
+ primary_lpc = n;
+ if (dt_has_node_property(n, "#address-cells", NULL))
+ break;
+ dt_add_property_cells(n, "#address-cells", 2);
+ dt_add_property_cells(n, "#size-cells", 1);
+ dt_add_property_strings(n, "status", "ok");
+ }
+
+ /* Missing "primary" property in LPC bus */
+ if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL))
+ dt_add_property(primary_lpc, "primary", NULL, 0);
+
+ /* Missing "scom-controller" */
+ dt_for_each_compatible(dt_root, n, "ibm,xscom") {
+ if (!dt_has_node_property(n, "scom-controller", NULL))
+ dt_add_property(n, "scom-controller", NULL, 0);
+ }
+}
+
+static void add_arch_vector(void)
+{
+ /**
+ * vec5 = a PVR-list : Number-of-option-vectors :
+ * option-vectors[Number-of-option-vectors + 1]
+ */
+ uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00};
+
+ if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL))
+ return;
+
+ dt_add_property(dt_chosen, "ibm,architecture-vec-5",
+ vec5, sizeof(vec5));
+}
+
+static void dt_init_misc(void)
+{
+ /* Check if there's a /chosen node, if not, add one */
+ dt_chosen = dt_find_by_path(dt_root, "/chosen");
+ if (!dt_chosen)
+ dt_chosen = dt_new(dt_root, "chosen");
+ assert(dt_chosen);
+
+ /* Add IBM architecture vectors if needed */
+ add_arch_vector();
+
+ /* Add the "OPAL virtual ICS*/
+ add_ics_node();
+
+ /* Additional fixups. TODO: Move into platform */
+ dt_fixups();
+}
+
+/* Called from head.S, thus no prototype. */
+void main_cpu_entry(const void *fdt, u32 master_cpu);
+
+void __noreturn main_cpu_entry(const void *fdt, u32 master_cpu)
+{
+ /*
+ * WARNING: At this point. the timebases have
+ * *not* been synchronized yet. Do not use any timebase
+ * related functions for timeouts etc... unless you can cope
+ * with the speed being some random core clock divider and
+ * the value jumping backward when the synchronization actually
+ * happens (in chiptod_init() below).
+ *
+ * Also the current cpu_thread() struct is not initialized
+ * either so we need to clear it out first thing first (without
+ * putting any other useful info in there jus yet) otherwise
+ * printf an locks are going to play funny games with "con_suspend"
+ */
+ pre_init_boot_cpu();
+
+ /*
+ * Before first printk, ensure console buffer is clear or
+ * reading tools might think it has wrapped
+ */
+ clear_console();
+
+ printf("SkiBoot %s starting...\n", gitid);
+
+ /* Initialize boot cpu's cpu_thread struct */
+ init_boot_cpu();
+
+ /* Now locks can be used */
+ init_locks();
+
+ /* Create the OPAL call table early on, entries can be overridden
+ * later on (FSP console code for example)
+ */
+ opal_table_init();
+
+ /*
+ * If we are coming in with a flat device-tree, we expand it
+ * now. Else look for HDAT and create a device-tree from them
+ *
+ * Hack alert: When entering via the OPAL entry point, fdt
+ * is set to -1, we record that and pass it to parse_hdat
+ */
+ if (fdt == (void *)-1ul)
+ parse_hdat(true, master_cpu);
+ else if (fdt == NULL)
+ parse_hdat(false, master_cpu);
+ else {
+ dt_expand(fdt);
+ }
+
+ /*
+ * From there, we follow a fairly strict initialization order.
+ *
+ * First we need to build up our chip data structures and initialize
+ * XSCOM which will be needed for a number of susbequent things.
+ *
+ * We want XSCOM available as early as the platform probe in case the
+ * probe requires some HW accesses.
+ *
+ * We also initialize the FSI master at that point in case we need
+ * to access chips via that path early on.
+ */
+ init_chips();
+ xscom_init();
+ mfsi_init();
+
+ /*
+ * Put various bits & pieces in device-tree that might not
+ * already be there such as the /chosen node if not there yet,
+ * the ICS node, etc... This can potentially use XSCOM
+ */
+ dt_init_misc();
+
+ /*
+ * Initialize LPC (P8 only) so we can get to UART, BMC and
+ * other system controller. This is done before probe_platform
+ * so that the platform probing code can access an external
+ * BMC if needed.
+ */
+ lpc_init();
+
+ /*
+ * Now, we init our memory map from the device-tree, and immediately
+ * reserve areas which we know might contain data coming from
+ * HostBoot. We need to do these things before we start doing
+ * allocations outside of our heap, such as chip local allocs,
+ * otherwise we might clobber those data.
+ */
+ mem_region_init();
+
+ /* Reserve HOMER and OCC area */
+ homer_init();
+
+ /* Initialize host services. */
+ hservices_init();
+
+ /*
+ * We probe the platform now. This means the platform probe gets
+ * the opportunity to reserve additional areas of memory if needed.
+ *
+ * Note: Timebases still not synchronized.
+ */
+ probe_platform();
+
+ /* Initialize the rest of the cpu thread structs */
+ init_all_cpus();
+
+ /* Add the /opal node to the device-tree */
+ add_opal_node();
+
+ /* Allocate our split trace buffers now. Depends add_opal_node() */
+ init_trace_buffers();
+
+ /* Get the ICPs and make sure they are in a sane state */
+ init_interrupts();
+
+ /* Grab centaurs from device-tree if present (only on FSP-less) */
+ centaur_init();
+
+ /* Initialize PSI (depends on probe_platform being called) */
+ psi_init();
+
+ /* Call in secondary CPUs */
+ cpu_bringup();
+
+ /*
+ * Sycnhronize time bases. Thi resets all the TB values to a small
+ * value (so they appear to go backward at this point), and synchronize
+ * all core timebases to the global ChipTOD network
+ */
+ chiptod_init(master_cpu);
+
+ /*
+ * We have initialized the basic HW, we can now call into the
+ * platform to perform subsequent inits, such as establishing
+ * communication with the FSP.
+ */
+ if (platform.init)
+ platform.init();
+
+ /* Init SLW related stuff, including fastsleep */
+ slw_init();
+
+ op_display(OP_LOG, OP_MOD_INIT, 0x0002);
+
+ /* Read in NVRAM and set it up */
+ nvram_init();
+
+ /* NX init */
+ nx_init();
+
+ /* Initialize the opal messaging */
+ opal_init_msg();
+
+ /* Probe IO hubs */
+ probe_p5ioc2();
+ probe_p7ioc();
+
+ /* Probe PHB3 on P8 */
+ probe_phb3();
+
+ /* Initialize PCI */
+ pci_init_slots();
+
+ /*
+ * These last few things must be done as late as possible
+ * because they rely on various other things having been setup,
+ * for example, add_opal_interrupts() will add all the interrupt
+ * sources that are going to the firmware. We can't add a new one
+ * after that call. Similarily, the mem_region calls will construct
+ * the reserve maps in the DT so we shouldn't affect the memory
+ * regions after that
+ */
+
+ /* Add the list of interrupts going to OPAL */
+ add_opal_interrupts();
+
+ /* Now release parts of memory nodes we haven't used ourselves... */
+ mem_region_release_unused();
+
+ /* ... and add remaining reservations to the DT */
+ mem_region_add_dt_reserved();
+
+ load_and_boot_kernel(false);
+}
+
+void __noreturn __secondary_cpu_entry(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ /* Secondary CPU called in */
+ cpu_callin(cpu);
+
+ /* Wait for work to do */
+ while(true) {
+ int i;
+
+ /* Process pending jobs on this processor */
+ cpu_process_jobs();
+
+ /* Relax a bit to give the simulator some breathing space */
+ i = 1000;
+ while (--i)
+ smt_very_low();
+ smt_low();
+ }
+}
+
+/* Called from head.S, thus no prototype. */
+void secondary_cpu_entry(void);
+
+void __noreturn secondary_cpu_entry(void)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ printf("INIT: CPU PIR 0x%04x called in\n", cpu->pir);
+
+ __secondary_cpu_entry();
+}
+
diff --git a/core/interrupts.c b/core/interrupts.c
new file mode 100644
index 0000000..cabebc2
--- /dev/null
+++ b/core/interrupts.c
@@ -0,0 +1,332 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <cpu.h>
+#include <fsp.h>
+#include <interrupts.h>
+#include <opal.h>
+#include <io.h>
+#include <cec.h>
+#include <device.h>
+#include <ccan/str/str.h>
+
+/* ICP registers */
+#define ICP_XIRR 0x4 /* 32-bit access */
+#define ICP_CPPR 0x4 /* 8-bit access */
+#define ICP_MFRR 0xc /* 8-bit access */
+
+struct irq_source {
+ uint32_t start;
+ uint32_t end;
+ const struct irq_source_ops *ops;
+ void *data;
+ struct list_node link;
+};
+
+static LIST_HEAD(irq_sources);
+static struct lock irq_lock = LOCK_UNLOCKED;
+
+void register_irq_source(const struct irq_source_ops *ops, void *data,
+ uint32_t start, uint32_t count)
+{
+ struct irq_source *is, *is1;
+
+ is = zalloc(sizeof(struct irq_source));
+ assert(is);
+ is->start = start;
+ is->end = start + count;
+ is->ops = ops;
+ is->data = data;
+
+ printf("IRQ: Registering %04x..%04x ops @%p (data %p) %s\n",
+ start, start + count - 1, ops, data,
+ ops->interrupt ? "[Internal]" : "[OS]");
+
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is1, link) {
+ if (is->end > is1->start && is->start < is1->end) {
+ prerror("register IRQ source overlap !\n");
+ prerror(" new: %x..%x old: %x..%x\n",
+ is->start, is->end - 1,
+ is1->start, is1->end - 1);
+ assert(0);
+ }
+ }
+ list_add_tail(&irq_sources, &is->link);
+ unlock(&irq_lock);
+}
+
+void unregister_irq_source(uint32_t start, uint32_t count)
+{
+ struct irq_source *is;
+
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is, link) {
+ if (start >= is->start && start < is->end) {
+ if (start != is->start ||
+ count != (is->end - is->start)) {
+ prerror("unregister IRQ source mismatch !\n");
+ prerror("start:%x, count: %x match: %x..%x\n",
+ start, count, is->start, is->end);
+ assert(0);
+ }
+ list_del(&is->link);
+ unlock(&irq_lock);
+ /* XXX Add synchronize / RCU */
+ free(is);
+ return;
+ }
+ }
+ unlock(&irq_lock);
+ prerror("unregister IRQ source not found !\n");
+ prerror("start:%x, count: %x\n", start, count);
+ assert(0);
+}
+
+/*
+ * This takes a 6-bit chip id and returns a 20 bit value representing
+ * the PSI interrupt. This includes all the fields above, ie, is a
+ * global interrupt number.
+ *
+ * For P8, this returns the base of the 8-interrupts block for PSI
+ */
+uint32_t get_psi_interrupt(uint32_t chip_id)
+{
+ uint32_t irq;
+
+ switch(proc_gen) {
+ case proc_gen_p7:
+ /* Get the chip ID into position, it already has
+ * the T bit so all we need is room for the GX
+ * bit, 9 bit BUID and 4 bit level
+ */
+ irq = chip_id << (1 + 9 + 4);
+
+ /* Add in the BUID */
+ irq |= P7_PSI_IRQ_BUID << 4;
+ break;
+ case proc_gen_p8:
+ irq = P8_CHIP_IRQ_BLOCK_BASE(chip_id, P8_IRQ_BLOCK_MISC);
+ irq += P8_IRQ_MISC_PSI_BASE;
+ break;
+ default:
+ assert(false);
+ };
+
+ return irq;
+}
+
+
+struct dt_node *add_ics_node(void)
+{
+ struct dt_node *ics = dt_new_addr(dt_root, "interrupt-controller", 0);
+ if (!ics)
+ return NULL;
+
+ dt_add_property_cells(ics, "reg", 0, 0, 0, 0);
+ dt_add_property_strings(ics, "compatible", "IBM,ppc-xics",
+ "IBM,opal-xics");
+ dt_add_property_cells(ics, "#address-cells", 0);
+ dt_add_property_cells(ics, "#interrupt-cells", 1);
+ dt_add_property_string(ics, "device_type",
+ "PowerPC-Interrupt-Source-Controller");
+ dt_add_property(ics, "interrupt-controller", NULL, 0);
+
+ return ics;
+}
+
+uint32_t get_ics_phandle(void)
+{
+ struct dt_node *i;
+
+ for (i = dt_first(dt_root); i; i = dt_next(dt_root, i)) {
+ if (streq(i->name, "interrupt-controller@0")) {
+ return i->phandle;
+ }
+ }
+ abort();
+}
+
+void add_opal_interrupts(void)
+{
+ struct irq_source *is;
+ unsigned int i, count = 0;
+ uint32_t *irqs = NULL, isn;
+
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is, link) {
+ /*
+ * Add a source to opal-interrupts if it has an
+ * ->interrupt callback
+ */
+ if (!is->ops->interrupt)
+ continue;
+ for (isn = is->start; isn < is->end; isn++) {
+ i = count++;
+ irqs = realloc(irqs, 4 * count);
+ irqs[i] = isn;
+ }
+ }
+ unlock(&irq_lock);
+
+ /* The opal-interrupts property has one cell per interrupt,
+ * it is not a standard interrupt property
+ */
+ if (irqs)
+ dt_add_property(opal_node, "opal-interrupts", irqs, count * 4);
+}
+
+/*
+ * This is called at init time (and one fast reboot) to sanitize the
+ * ICP. We set our priority to 0 to mask all interrupts and make sure
+ * no IPI is on the way.
+ */
+void reset_cpu_icp(void)
+{
+ void *icp = this_cpu()->icp_regs;
+
+ assert(icp);
+
+ /* Clear pending IPIs */
+ out_8(icp + ICP_MFRR, 0xff);
+
+ /* Set priority to max, ignore all incoming interrupts, EOI IPIs */
+ out_be32(icp + ICP_XIRR, 2);
+}
+
+/* Used by the PSI code to send an EOI during reset. This will also
+ * set the CPPR to 0 which should already be the case anyway
+ */
+void icp_send_eoi(uint32_t interrupt)
+{
+ void *icp = this_cpu()->icp_regs;
+
+ assert(icp);
+
+ /* Set priority to max, ignore all incoming interrupts */
+ out_be32(icp + ICP_XIRR, interrupt & 0xffffff);
+}
+
+/* This is called before winkle, we clear pending IPIs and set our priority
+ * to 1 to mask all but the IPI
+ */
+void icp_prep_for_rvwinkle(void)
+{
+ void *icp = this_cpu()->icp_regs;
+
+ assert(icp);
+
+ /* Clear pending IPIs */
+ out_8(icp + ICP_MFRR, 0xff);
+
+ /* Set priority to 1, ignore all incoming interrupts, EOI IPIs */
+ out_be32(icp + ICP_XIRR, 0x01000002);
+}
+
+/* This is called to wakeup somebody from winkle */
+void icp_kick_cpu(struct cpu_thread *cpu)
+{
+ void *icp = cpu->icp_regs;
+
+ assert(icp);
+
+ /* Send high priority IPI */
+ out_8(icp + ICP_MFRR, 0);
+}
+
+static struct irq_source *irq_find_source(uint32_t isn)
+{
+ struct irq_source *is;
+
+ lock(&irq_lock);
+ list_for_each(&irq_sources, is, link) {
+ if (isn >= is->start && isn < is->end) {
+ unlock(&irq_lock);
+ return is;
+ }
+ }
+ unlock(&irq_lock);
+
+ return NULL;
+}
+
+static int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority)
+{
+ struct irq_source *is = irq_find_source(isn);
+
+ if (!is || !is->ops->set_xive)
+ return OPAL_PARAMETER;
+
+ return is->ops->set_xive(is->data, isn, server, priority);
+}
+opal_call(OPAL_SET_XIVE, opal_set_xive, 3);
+
+static int64_t opal_get_xive(uint32_t isn, uint16_t *server, uint8_t *priority)
+{
+ struct irq_source *is = irq_find_source(isn);
+
+ if (!is || !is->ops->get_xive)
+ return OPAL_PARAMETER;
+
+ return is->ops->get_xive(is->data, isn, server, priority);
+}
+opal_call(OPAL_GET_XIVE, opal_get_xive, 3);
+
+static int64_t opal_handle_interrupt(uint32_t isn, uint64_t *outstanding_event_mask)
+{
+ struct irq_source *is = irq_find_source(isn);
+ int64_t rc = OPAL_SUCCESS;
+
+ if (!is || !is->ops->interrupt) {
+ rc = OPAL_PARAMETER;
+ goto bail;
+ }
+
+ is->ops->interrupt(is->data, isn);
+
+ /* Update output events */
+ bail:
+ if (outstanding_event_mask)
+ *outstanding_event_mask = opal_pending_events;
+
+ return rc;
+}
+opal_call(OPAL_HANDLE_INTERRUPT, opal_handle_interrupt, 2);
+
+void init_interrupts(void)
+{
+ struct dt_node *icp;
+ const struct dt_property *sranges;
+ struct cpu_thread *cpu;
+ u32 base, count, i;
+ u64 addr, size;
+
+ dt_for_each_compatible(dt_root, icp, "ibm,ppc-xicp") {
+ sranges = dt_require_property(icp,
+ "ibm,interrupt-server-ranges",
+ -1);
+ base = dt_get_number(sranges->prop, 1);
+ count = dt_get_number(sranges->prop + 4, 1);
+ for (i = 0; i < count; i++) {
+ addr = dt_get_address(icp, i, &size);
+ cpu = find_cpu_by_server(base + i);
+ if (cpu)
+ cpu->icp_regs = (void *)addr;
+ }
+ }
+}
+
diff --git a/core/lock.c b/core/lock.c
new file mode 100644
index 0000000..fc4bf6b
--- /dev/null
+++ b/core/lock.c
@@ -0,0 +1,125 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <lock.h>
+#include <assert.h>
+#include <processor.h>
+#include <cpu.h>
+#include <console.h>
+
+/* Set to bust locks. Note, this is initialized to true because our
+ * lock debugging code is not going to work until we have the per
+ * CPU data initialized
+ */
+bool bust_locks = true;
+
+#ifdef DEBUG_LOCKS
+
+static void lock_error(struct lock *l, const char *reason, uint16_t err)
+{
+ op_display(OP_FATAL, OP_MOD_LOCK, err);
+
+ fprintf(stderr, "LOCK ERROR: %s @%p (state: 0x%016lx)\n",
+ reason, l, l->lock_val);
+ abort();
+}
+
+static void lock_check(struct lock *l)
+{
+ if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir)
+ lock_error(l, "Invalid recursive lock", 0);
+}
+
+static void unlock_check(struct lock *l)
+{
+ if (!(l->lock_val & 1))
+ lock_error(l, "Unlocking unlocked lock", 1);
+
+ if ((l->lock_val >> 32) != this_cpu()->pir)
+ lock_error(l, "Unlocked non-owned lock", 2);
+
+ if (l->in_con_path && this_cpu()->con_suspend == 0)
+ lock_error(l, "Unlock con lock with console not suspended", 3);
+}
+
+#else
+static inline void lock_check(struct lock *l) { };
+static inline void unlock_check(struct lock *l) { };
+#endif /* DEBUG_LOCKS */
+
+
+bool try_lock(struct lock *l)
+{
+ if (__try_lock(l)) {
+ if (l->in_con_path)
+ this_cpu()->con_suspend++;
+ return true;
+ }
+ return false;
+}
+
+void lock(struct lock *l)
+{
+ if (bust_locks)
+ return;
+
+ lock_check(l);
+ for (;;) {
+ if (try_lock(l))
+ break;
+ smt_low();
+ }
+ smt_medium();
+}
+
+void unlock(struct lock *l)
+{
+ struct cpu_thread *cpu = this_cpu();
+
+ if (bust_locks)
+ return;
+
+ unlock_check(l);
+
+ lwsync();
+ l->lock_val = 0;
+
+ if (l->in_con_path) {
+ cpu->con_suspend--;
+ if (cpu->con_suspend == 0 && cpu->con_need_flush)
+ flush_console();
+ }
+}
+
+bool lock_recursive(struct lock *l)
+{
+ if (bust_locks)
+ return false;
+
+ if ((l->lock_val & 1) &&
+ (l->lock_val >> 32) == this_cpu()->pir)
+ return false;
+
+ lock(l);
+ return true;
+}
+
+
+void init_locks(void)
+{
+ bust_locks = false;
+}
diff --git a/core/malloc.c b/core/malloc.c
new file mode 100644
index 0000000..692a501
--- /dev/null
+++ b/core/malloc.c
@@ -0,0 +1,84 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Wrappers for malloc, et. al. */
+#include <mem_region.h>
+#include <lock.h>
+#include <string.h>
+#include <mem_region-malloc.h>
+
+#define DEFAULT_ALIGN __alignof__(long)
+
+void *__memalign(size_t blocksize, size_t bytes, const char *location)
+{
+ void *p;
+
+ lock(&mem_region_lock);
+ p = mem_alloc(&skiboot_heap, bytes, blocksize, location);
+ unlock(&mem_region_lock);
+
+ return p;
+}
+
+void *__malloc(size_t bytes, const char *location)
+{
+ return __memalign(DEFAULT_ALIGN, bytes, location);
+}
+
+void __free(void *p, const char *location)
+{
+ lock(&mem_region_lock);
+ mem_free(&skiboot_heap, p, location);
+ unlock(&mem_region_lock);
+}
+
+void *__realloc(void *ptr, size_t size, const char *location)
+{
+ void *newptr;
+
+ /* Two classic malloc corner cases. */
+ if (!size) {
+ __free(ptr, location);
+ return NULL;
+ }
+ if (!ptr)
+ return __malloc(size, location);
+
+ lock(&mem_region_lock);
+ if (mem_resize(&skiboot_heap, ptr, size, location)) {
+ newptr = ptr;
+ } else {
+ newptr = mem_alloc(&skiboot_heap, size, DEFAULT_ALIGN,
+ location);
+ if (newptr) {
+ size_t copy = mem_size(&skiboot_heap, ptr);
+ if (copy > size)
+ copy = size;
+ memcpy(newptr, ptr, copy);
+ mem_free(&skiboot_heap, ptr, location);
+ }
+ }
+ unlock(&mem_region_lock);
+ return newptr;
+}
+
+void *__zalloc(size_t bytes, const char *location)
+{
+ void *p = __malloc(bytes, location);
+
+ if (p)
+ memset(p, 0, bytes);
+ return p;
+}
diff --git a/core/mem_region.c b/core/mem_region.c
new file mode 100644
index 0000000..8904a18
--- /dev/null
+++ b/core/mem_region.c
@@ -0,0 +1,956 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <mem-map.h>
+#include <libfdt_env.h>
+#include <lock.h>
+#include <device.h>
+#include <cpu.h>
+#include <affinity.h>
+#include <types.h>
+#include <mem_region.h>
+#include <mem_region-malloc.h>
+
+/* Memory poisoning on free (if POISON_MEM_REGION set to 1) */
+#define POISON_MEM_REGION 0
+#define POISON_MEM_REGION_WITH 0x99
+#define POISON_MEM_REGION_LIMIT 1*1024*1024*1024
+
+struct lock mem_region_lock = LOCK_UNLOCKED;
+
+static struct list_head regions = LIST_HEAD_INIT(regions);
+
+static struct mem_region skiboot_os_reserve = {
+ .name = "ibm,os-reserve",
+ .start = 0,
+ .len = SKIBOOT_BASE,
+ .type = REGION_OS,
+};
+
+struct mem_region skiboot_heap = {
+ .name = "ibm,firmware-heap",
+ .start = HEAP_BASE,
+ .len = HEAP_SIZE,
+ .type = REGION_SKIBOOT_HEAP,
+};
+
+static struct mem_region skiboot_code_and_text = {
+ .name = "ibm,firmware-code",
+ .start = SKIBOOT_BASE,
+ .len = HEAP_BASE - SKIBOOT_BASE,
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_after_heap = {
+ .name = "ibm,firmware-data",
+ .start = HEAP_BASE + HEAP_SIZE,
+ .len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE),
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+static struct mem_region skiboot_cpu_stacks = {
+ .name = "ibm,firmware-stacks",
+ .start = CPU_STACKS_BASE,
+ .len = 0, /* TBA */
+ .type = REGION_SKIBOOT_FIRMWARE,
+};
+
+struct alloc_hdr {
+ bool free : 1;
+ bool prev_free : 1;
+ unsigned long num_longs : BITS_PER_LONG-2; /* Including header. */
+ const char *location;
+};
+
+struct free_hdr {
+ struct alloc_hdr hdr;
+ struct list_node list;
+ /* ... unsigned long tailer; */
+};
+
+#define ALLOC_HDR_LONGS (sizeof(struct alloc_hdr) / sizeof(long))
+#define ALLOC_MIN_LONGS (sizeof(struct free_hdr) / sizeof(long) + 1)
+
+/* Avoid ugly casts. */
+static void *region_start(const struct mem_region *region)
+{
+ return (void *)(unsigned long)region->start;
+}
+
+/* Each free block has a tailer, so we can walk backwards. */
+static unsigned long *tailer(struct free_hdr *f)
+{
+ return (unsigned long *)f + f->hdr.num_longs - 1;
+}
+
+/* This walks forward to the next hdr (or NULL if at the end). */
+static struct alloc_hdr *next_hdr(const struct mem_region *region,
+ const struct alloc_hdr *hdr)
+{
+ void *next;
+
+ next = ((unsigned long *)hdr + hdr->num_longs);
+ if (next >= region_start(region) + region->len)
+ next = NULL;
+ return next;
+}
+
+/* Creates free block covering entire region. */
+static void init_allocatable_region(struct mem_region *region)
+{
+ struct free_hdr *f = region_start(region);
+ assert(region->type == REGION_SKIBOOT_HEAP);
+ f->hdr.num_longs = region->len / sizeof(long);
+ f->hdr.free = true;
+ f->hdr.prev_free = false;
+ *tailer(f) = f->hdr.num_longs;
+ list_head_init(&region->free_list);
+ list_add(&region->free_list, &f->list);
+}
+
+static void make_free(struct mem_region *region, struct free_hdr *f,
+ const char *location)
+{
+ struct alloc_hdr *next;
+#if POISON_MEM_REGION == 1
+ size_t poison_size= (void*)tailer(f) - (void*)(f+1);
+
+ /* We only poison up to a limit, as otherwise boot is kinda slow */
+ if (poison_size > POISON_MEM_REGION_LIMIT) {
+ poison_size = POISON_MEM_REGION_LIMIT;
+ }
+
+ memset(f+1, POISON_MEM_REGION_WITH, poison_size);
+#endif
+ if (f->hdr.prev_free) {
+ struct free_hdr *prev;
+ unsigned long *prev_tailer = (unsigned long *)f - 1;
+
+ assert(*prev_tailer);
+ prev = (void *)((unsigned long *)f - *prev_tailer);
+ assert(prev->hdr.free);
+ assert(!prev->hdr.prev_free);
+
+ /* Expand to cover the one we just freed. */
+ prev->hdr.num_longs += f->hdr.num_longs;
+ f = prev;
+ } else {
+ f->hdr.free = true;
+ f->hdr.location = location;
+ list_add(&region->free_list, &f->list);
+ }
+
+ /* Fix up tailer. */
+ *tailer(f) = f->hdr.num_longs;
+
+ /* If next is free, coalesce it */
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ next->prev_free = true;
+ if (next->free) {
+ struct free_hdr *next_free = (void *)next;
+ list_del_from(&region->free_list, &next_free->list);
+ /* Maximum of one level of recursion */
+ make_free(region, next_free, location);
+ }
+ }
+}
+
+/* Can we fit this many longs with this alignment in this free block? */
+static bool fits(struct free_hdr *f, size_t longs, size_t align, size_t *offset)
+{
+ *offset = 0;
+
+ while (f->hdr.num_longs >= *offset + longs) {
+ size_t addr;
+
+ addr = (unsigned long)f
+ + (*offset + ALLOC_HDR_LONGS) * sizeof(long);
+ if ((addr & (align - 1)) == 0)
+ return true;
+
+ /* Don't make tiny chunks! */
+ if (*offset == 0)
+ *offset = ALLOC_MIN_LONGS;
+ else
+ (*offset)++;
+ }
+ return false;
+}
+
+static void discard_excess(struct mem_region *region,
+ struct alloc_hdr *hdr, size_t alloc_longs,
+ const char *location)
+{
+ /* Do we have excess? */
+ if (hdr->num_longs > alloc_longs + ALLOC_MIN_LONGS) {
+ struct free_hdr *post;
+
+ /* Set up post block. */
+ post = (void *)hdr + alloc_longs * sizeof(long);
+ post->hdr.num_longs = hdr->num_longs - alloc_longs;
+ post->hdr.prev_free = false;
+
+ /* Trim our block. */
+ hdr->num_longs = alloc_longs;
+
+ /* This coalesces as required. */
+ make_free(region, post, location);
+ }
+}
+
+static const char *hdr_location(const struct alloc_hdr *hdr)
+{
+ /* Corrupt: step carefully! */
+ if (is_rodata(hdr->location))
+ return hdr->location;
+ return "*CORRUPT*";
+}
+
+static void bad_header(const struct mem_region *region,
+ const struct alloc_hdr *hdr,
+ const char *during,
+ const char *location)
+{
+ /* Corrupt: step carefully! */
+ if (is_rodata(hdr->location))
+ prerror("%p (in %s) %s at %s, previously %s\n",
+ hdr-1, region->name, during, location, hdr->location);
+ else
+ prerror("%p (in %s) %s at %s, previously %p\n",
+ hdr-1, region->name, during, location, hdr->location);
+ abort();
+}
+
+static bool region_is_reserved(struct mem_region *region)
+{
+ return region->type != REGION_OS;
+}
+
+static void mem_dump_allocs(void)
+{
+ struct mem_region *region;
+ struct alloc_hdr *hdr;
+
+ /* Second pass: populate property data */
+ printf("Memory regions:\n");
+ list_for_each(&regions, region, list) {
+ if (region->type != REGION_SKIBOOT_HEAP)
+ continue;
+ printf(" 0x%012llx..%012llx : %s\n",
+ (long long)region->start,
+ (long long)(region->start + region->len - 1),
+ region->name);
+ if (region->free_list.n.next == NULL) {
+ printf(" no allocs\n");
+ continue;
+ }
+ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) {
+ if (hdr->free)
+ continue;
+ printf(" 0x%.8lx %s\n", hdr->num_longs * sizeof(long),
+ hdr_location(hdr));
+ }
+ }
+}
+
+static void *__mem_alloc(struct mem_region *region, size_t size, size_t align,
+ const char *location)
+{
+ size_t alloc_longs, offset;
+ struct free_hdr *f;
+ struct alloc_hdr *next;
+
+ /* Align must be power of 2. */
+ assert(!((align - 1) & align));
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ /* Unallocatable region? */
+ if (region->type != REGION_SKIBOOT_HEAP)
+ return NULL;
+
+ /* First allocation? */
+ if (region->free_list.n.next == NULL)
+ init_allocatable_region(region);
+
+ /* Don't do screwy sizes. */
+ if (size > region->len)
+ return NULL;
+
+ /* Don't do tiny alignments, we deal in long increments. */
+ if (align < sizeof(long))
+ align = sizeof(long);
+
+ /* Convert size to number of longs, too. */
+ alloc_longs = (size + sizeof(long)-1) / sizeof(long) + ALLOC_HDR_LONGS;
+
+ /* Can't be too small for when we free it, either. */
+ if (alloc_longs < ALLOC_MIN_LONGS)
+ alloc_longs = ALLOC_MIN_LONGS;
+
+ /* Walk free list. */
+ list_for_each(&region->free_list, f, list) {
+ /* We may have to skip some to meet alignment. */
+ if (fits(f, alloc_longs, align, &offset))
+ goto found;
+ }
+
+ return NULL;
+
+found:
+ assert(f->hdr.free);
+ assert(!f->hdr.prev_free);
+
+ /* This block is no longer free. */
+ list_del_from(&region->free_list, &f->list);
+ f->hdr.free = false;
+ f->hdr.location = location;
+
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ assert(next->prev_free);
+ next->prev_free = false;
+ }
+
+ if (offset != 0) {
+ struct free_hdr *pre = f;
+
+ f = (void *)f + offset * sizeof(long);
+ assert(f >= pre + 1);
+
+ /* Set up new header. */
+ f->hdr.num_longs = pre->hdr.num_longs - offset;
+ /* f->hdr.prev_free will be set by make_free below. */
+ f->hdr.free = false;
+ f->hdr.location = location;
+
+ /* Fix up old header. */
+ pre->hdr.num_longs = offset;
+ pre->hdr.prev_free = false;
+
+ /* This coalesces as required. */
+ make_free(region, pre, location);
+ }
+
+ /* We might be too long; put the rest back. */
+ discard_excess(region, &f->hdr, alloc_longs, location);
+
+ /* Clear tailer for debugging */
+ *tailer(f) = 0;
+
+ /* Their pointer is immediately after header. */
+ return &f->hdr + 1;
+}
+
+void *mem_alloc(struct mem_region *region, size_t size, size_t align,
+ const char *location)
+{
+ void *r = __mem_alloc(region, size, align, location);
+
+ if (r)
+ return r;
+
+ prerror("mem_alloc(0x%lx, 0x%lx, \"%s\") failed !\n",
+ size, align, location);
+ mem_dump_allocs();
+ return NULL;
+}
+
+void mem_free(struct mem_region *region, void *mem, const char *location)
+{
+ struct alloc_hdr *hdr;
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ /* Freeing NULL is always a noop. */
+ if (!mem)
+ return;
+
+ /* Your memory is in the region, right? */
+ assert(mem >= region_start(region) + sizeof(*hdr));
+ assert(mem < region_start(region) + region->len);
+
+ /* Grab header. */
+ hdr = mem - sizeof(*hdr);
+
+ if (hdr->free)
+ bad_header(region, hdr, "re-freed", location);
+
+ make_free(region, (struct free_hdr *)hdr, location);
+}
+
+size_t mem_size(const struct mem_region *region __unused, const void *ptr)
+{
+ const struct alloc_hdr *hdr = ptr - sizeof(*hdr);
+ return hdr->num_longs * sizeof(long);
+}
+
+bool mem_resize(struct mem_region *region, void *mem, size_t len,
+ const char *location)
+{
+ struct alloc_hdr *hdr, *next;
+ struct free_hdr *f;
+
+ /* This should be a constant. */
+ assert(is_rodata(location));
+
+ /* Get header. */
+ hdr = mem - sizeof(*hdr);
+ if (hdr->free)
+ bad_header(region, hdr, "resize", location);
+
+ /* Round up size to multiple of longs. */
+ len = (sizeof(*hdr) + len + sizeof(long) - 1) / sizeof(long);
+
+ /* Can't be too small for when we free it, either. */
+ if (len < ALLOC_MIN_LONGS)
+ len = ALLOC_MIN_LONGS;
+
+ /* Shrinking is simple. */
+ if (len <= hdr->num_longs) {
+ hdr->location = location;
+ discard_excess(region, hdr, len, location);
+ return true;
+ }
+
+ /* Check if we can expand. */
+ next = next_hdr(region, hdr);
+ if (!next || !next->free || hdr->num_longs + next->num_longs < len)
+ return false;
+
+ /* OK, it's free and big enough, absorb it. */
+ f = (struct free_hdr *)next;
+ list_del_from(&region->free_list, &f->list);
+ hdr->num_longs += next->num_longs;
+ hdr->location = location;
+
+ /* Update next prev_free */
+ next = next_hdr(region, &f->hdr);
+ if (next) {
+ assert(next->prev_free);
+ next->prev_free = false;
+ }
+
+ /* Clear tailer for debugging */
+ *tailer(f) = 0;
+
+ /* Now we might have *too* much. */
+ discard_excess(region, hdr, len, location);
+ return true;
+}
+
+bool mem_check(const struct mem_region *region)
+{
+ size_t frees = 0;
+ struct alloc_hdr *hdr, *prev_free = NULL;
+ struct free_hdr *f;
+
+ /* Check it's sanely aligned. */
+ if (region->start % sizeof(struct alloc_hdr)) {
+ prerror("Region '%s' not sanely aligned (%llx)\n",
+ region->name, (unsigned long long)region->start);
+ return false;
+ }
+ if ((long)region->len % sizeof(struct alloc_hdr)) {
+ prerror("Region '%s' not sane length (%llu)\n",
+ region->name, (unsigned long long)region->len);
+ return false;
+ }
+
+ /* Not ours to play with, or empty? Don't do anything. */
+ if (region->type != REGION_SKIBOOT_HEAP ||
+ region->free_list.n.next == NULL)
+ return true;
+
+ /* Walk linearly. */
+ for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) {
+ if (hdr->num_longs < ALLOC_MIN_LONGS) {
+ prerror("Region '%s' %s %p (%s) size %zu\n",
+ region->name, hdr->free ? "free" : "alloc",
+ hdr, hdr_location(hdr),
+ hdr->num_longs * sizeof(long));
+ return false;
+ }
+ if ((unsigned long)hdr + hdr->num_longs * sizeof(long) >
+ region->start + region->len) {
+ prerror("Region '%s' %s %p (%s) oversize %zu\n",
+ region->name, hdr->free ? "free" : "alloc",
+ hdr, hdr_location(hdr),
+ hdr->num_longs * sizeof(long));
+ return false;
+ }
+ if (hdr->free) {
+ if (hdr->prev_free || prev_free) {
+ prerror("Region '%s' free %p (%s) has prev_free"
+ " %p (%s) %sset?\n",
+ region->name, hdr, hdr_location(hdr),
+ prev_free,
+ prev_free ? hdr_location(prev_free)
+ : "NULL",
+ hdr->prev_free ? "" : "un");
+ return false;
+ }
+ prev_free = hdr;
+ frees ^= (unsigned long)hdr - region->start;
+ } else {
+ if (hdr->prev_free != (bool)prev_free) {
+ prerror("Region '%s' alloc %p (%s) has"
+ " prev_free %p %sset?\n",
+ region->name, hdr, hdr_location(hdr),
+ prev_free, hdr->prev_free ? "" : "un");
+ return false;
+ }
+ prev_free = NULL;
+ }
+ }
+
+ /* Now walk free list. */
+ list_for_each(&region->free_list, f, list)
+ frees ^= (unsigned long)f - region->start;
+
+ if (frees) {
+ prerror("Region '%s' free list and walk do not match!\n",
+ region->name);
+ return false;
+ }
+ return true;
+}
+
+static struct mem_region *new_region(const char *name,
+ uint64_t start, uint64_t len,
+ struct dt_node *mem_node,
+ enum mem_region_type type)
+{
+ struct mem_region *region;
+
+ /* Avoid lock recursion, call mem_alloc directly. */
+ region = mem_alloc(&skiboot_heap,
+ sizeof(*region), __alignof__(*region), __location__);
+ if (!region)
+ return NULL;
+
+ region->name = name;
+ region->start = start;
+ region->len = len;
+ region->mem_node = mem_node;
+ region->type = type;
+ region->free_list.n.next = NULL;
+
+ return region;
+}
+
+/* We always split regions, so we only have to replace one. */
+static struct mem_region *split_region(struct mem_region *head,
+ uint64_t split_at,
+ enum mem_region_type type)
+{
+ struct mem_region *tail;
+ uint64_t end = head->start + head->len;
+
+ tail = new_region(head->name, split_at, end - split_at,
+ head->mem_node, type);
+ /* Original region becomes head. */
+ if (tail)
+ head->len -= tail->len;
+
+ return tail;
+}
+
+static bool intersects(const struct mem_region *region, uint64_t addr)
+{
+ return addr > region->start &&
+ addr < region->start + region->len;
+}
+
+static bool maybe_split(struct mem_region *r, uint64_t split_at)
+{
+ struct mem_region *tail;
+
+ if (!intersects(r, split_at))
+ return true;
+
+ tail = split_region(r, split_at, r->type);
+ if (!tail)
+ return false;
+
+ /* Tail add is important: we may need to split again! */
+ list_add_tail(&regions, &tail->list);
+ return true;
+}
+
+static bool overlaps(const struct mem_region *r1, const struct mem_region *r2)
+{
+ return (r1->start + r1->len > r2->start
+ && r1->start < r2->start + r2->len);
+}
+
+static struct mem_region *get_overlap(const struct mem_region *region)
+{
+ struct mem_region *i;
+
+ list_for_each(&regions, i, list) {
+ if (overlaps(region, i))
+ return i;
+ }
+ return NULL;
+}
+
+static bool add_region(struct mem_region *region)
+{
+ struct mem_region *r;
+
+ /* First split any regions which intersect. */
+ list_for_each(&regions, r, list)
+ if (!maybe_split(r, region->start) ||
+ !maybe_split(r, region->start + region->len))
+ return false;
+
+ /* Now we have only whole overlaps, if any. */
+ while ((r = get_overlap(region)) != NULL) {
+ assert(r->start == region->start);
+ assert(r->len == region->len);
+ list_del_from(&regions, &r->list);
+ /* We already hold mem_region lock */
+ mem_free(&skiboot_heap, r, __location__);
+ }
+
+ /* Finally, add in our own region. */
+ list_add(&regions, &region->list);
+ return true;
+}
+
+void mem_reserve(const char *name, uint64_t start, uint64_t len)
+{
+ struct mem_region *region;
+ bool added;
+
+ lock(&mem_region_lock);
+ region = new_region(name, start, len, NULL, REGION_RESERVED);
+ assert(region);
+ added = add_region(region);
+ assert(added);
+ unlock(&mem_region_lock);
+}
+
+static bool matches_chip_id(const __be32 ids[], size_t num, u32 chip_id)
+{
+ size_t i;
+
+ for (i = 0; i < num; i++)
+ if (be32_to_cpu(ids[i]) == chip_id)
+ return true;
+
+ return false;
+}
+
+void *__local_alloc(unsigned int chip_id, size_t size, size_t align,
+ const char *location)
+{
+ struct mem_region *region;
+ void *p = NULL;
+ bool use_local = true;
+
+ lock(&mem_region_lock);
+
+restart:
+ list_for_each(&regions, region, list) {
+ const struct dt_property *prop;
+ const __be32 *ids;
+
+ if (region->type != REGION_SKIBOOT_HEAP)
+ continue;
+
+ /* Don't allocate from normal heap. */
+ if (region == &skiboot_heap)
+ continue;
+
+ /* First pass, only match node local regions */
+ if (use_local) {
+ if (!region->mem_node)
+ continue;
+ prop = dt_find_property(region->mem_node, "ibm,chip-id");
+ ids = (const __be32 *)prop->prop;
+ if (!matches_chip_id(ids, prop->len/sizeof(u32),
+ chip_id))
+ continue;
+ }
+
+ /* Second pass, match anything */
+ p = mem_alloc(region, size, align, location);
+ if (p)
+ break;
+ }
+
+ /*
+ * If we can't allocate the memory block from the expected
+ * node, we bail to any one that can accomodate our request.
+ */
+ if (!p && use_local) {
+ use_local = false;
+ goto restart;
+ }
+
+ unlock(&mem_region_lock);
+
+ return p;
+}
+
+struct mem_region *find_mem_region(const char *name)
+{
+ struct mem_region *region;
+
+ list_for_each(&regions, region, list) {
+ if (streq(region->name, name))
+ return region;
+ }
+ return NULL;
+}
+
+/* Trawl through device tree, create memory regions from nodes. */
+void mem_region_init(void)
+{
+ const struct dt_property *names, *ranges;
+ struct mem_region *region;
+ struct dt_node *i;
+
+ /* Ensure we have no collision between skiboot core and our heap */
+ extern char _end[];
+ BUILD_ASSERT(HEAP_BASE >= (uint64_t)_end);
+
+ /*
+ * Add associativity properties outside of the lock
+ * to avoid recursive locking caused by allocations
+ * done by add_chip_dev_associativity()
+ */
+ dt_for_each_node(dt_root, i) {
+ if (!dt_has_node_property(i, "device_type", "memory"))
+ continue;
+
+ /* Add associativity properties */
+ add_chip_dev_associativity(i);
+ }
+
+ /* Add each memory node. */
+ dt_for_each_node(dt_root, i) {
+ uint64_t start, len;
+ char *rname;
+#define NODE_REGION_PREFIX "ibm,firmware-allocs-"
+
+ if (!dt_has_node_property(i, "device_type", "memory"))
+ continue;
+ rname = zalloc(strlen(i->name) + strlen(NODE_REGION_PREFIX) + 1);
+ strcat(rname, NODE_REGION_PREFIX);
+ strcat(rname, i->name);
+ start = dt_get_address(i, 0, &len);
+ lock(&mem_region_lock);
+ region = new_region(rname, start, len, i, REGION_SKIBOOT_HEAP);
+ if (!region) {
+ prerror("MEM: Could not add mem region %s!\n", i->name);
+ abort();
+ }
+ list_add(&regions, &region->list);
+ unlock(&mem_region_lock);
+ }
+
+ /* Now we know how many CPU stacks we have, fix that up. */
+ skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE;
+
+ lock(&mem_region_lock);
+
+ /* Now carve out our own reserved areas. */
+ if (!add_region(&skiboot_os_reserve) ||
+ !add_region(&skiboot_code_and_text) ||
+ !add_region(&skiboot_heap) ||
+ !add_region(&skiboot_after_heap) ||
+ !add_region(&skiboot_cpu_stacks)) {
+ prerror("Out of memory adding skiboot reserved areas\n");
+ abort();
+ }
+
+ /* Add reserved ranges from the DT */
+ names = dt_find_property(dt_root, "reserved-names");
+ ranges = dt_find_property(dt_root, "reserved-ranges");
+ if (names && ranges) {
+ const uint64_t *range;
+ int n, len;
+
+ range = (const void *)ranges->prop;
+
+ for (n = 0; n < names->len; n += len, range += 2) {
+ char *name;
+
+ len = strlen(names->prop + n) + 1;
+
+ name = mem_alloc(&skiboot_heap, len,
+ __alignof__(*name), __location__);
+ memcpy(name, names->prop + n, len);
+
+ region = new_region(name,
+ dt_get_number(range, 2),
+ dt_get_number(range + 1, 2),
+ NULL, REGION_RESERVED);
+ list_add(&regions, &region->list);
+ }
+ } else if (names || ranges) {
+ prerror("Invalid properties: reserved-names=%p "
+ "with reserved-ranges=%p\n",
+ names, ranges);
+ abort();
+ }
+
+ unlock(&mem_region_lock);
+
+ /* We generate the reservation properties from our own region list,
+ * which now includes the existing data.
+ */
+ if (names)
+ dt_del_property(dt_root, (struct dt_property *)names);
+ if (ranges)
+ dt_del_property(dt_root, (struct dt_property *)ranges);
+}
+
+static uint64_t allocated_length(const struct mem_region *r)
+{
+ struct free_hdr *f, *last = NULL;
+
+ /* No allocations at all? */
+ if (r->free_list.n.next == NULL)
+ return 0;
+
+ /* Find last free block. */
+ list_for_each(&r->free_list, f, list)
+ if (f > last)
+ last = f;
+
+ /* No free blocks? */
+ if (!last)
+ return r->len;
+
+ /* Last free block isn't at end? */
+ if (next_hdr(r, &last->hdr))
+ return r->len;
+ return (unsigned long)last - r->start;
+}
+
+/* Separate out allocated sections into their own region. */
+void mem_region_release_unused(void)
+{
+ struct mem_region *r;
+
+ lock(&mem_region_lock);
+
+ printf("Releasing unused memory:\n");
+ list_for_each(&regions, r, list) {
+ uint64_t used_len;
+
+ /* If it's not allocatable, ignore it. */
+ if (r->type != REGION_SKIBOOT_HEAP)
+ continue;
+
+ used_len = allocated_length(r);
+
+ printf(" %s: %llu/%llu used\n",
+ r->name, (long long)used_len, (long long)r->len);
+
+ /* We keep the skiboot heap. */
+ if (r == &skiboot_heap)
+ continue;
+
+ /* Nothing used? Whole thing is for Linux. */
+ if (used_len == 0)
+ r->type = REGION_OS;
+ /* Partially used? Split region. */
+ else if (used_len != r->len) {
+ struct mem_region *for_linux;
+ struct free_hdr *last = region_start(r) + used_len;
+
+ /* Remove the final free block. */
+ list_del_from(&r->free_list, &last->list);
+
+ for_linux = split_region(r, r->start + used_len,
+ REGION_OS);
+ if (!for_linux) {
+ prerror("OOM splitting mem node %s for linux\n",
+ r->name);
+ abort();
+ }
+ list_add(&regions, &for_linux->list);
+ }
+ }
+ unlock(&mem_region_lock);
+}
+
+void mem_region_add_dt_reserved(void)
+{
+ int names_len, ranges_len, len;
+ struct mem_region *region;
+ void *names, *ranges;
+ uint64_t *range;
+ char *name;
+
+ names_len = 0;
+ ranges_len = 0;
+
+ lock(&mem_region_lock);
+
+ /* First pass: calculate length of property data */
+ list_for_each(&regions, region, list) {
+ if (!region_is_reserved(region))
+ continue;
+ names_len += strlen(region->name) + 1;
+ ranges_len += 2 * sizeof(uint64_t);
+ }
+
+ /* Allocate property data with mem_alloc; malloc() acquires
+ * mem_region_lock */
+ names = mem_alloc(&skiboot_heap, names_len,
+ __alignof__(*names), __location__);
+ ranges = mem_alloc(&skiboot_heap, ranges_len,
+ __alignof__(*ranges), __location__);
+
+ name = names;
+ range = ranges;
+
+ printf("Reserved regions:\n");
+ /* Second pass: populate property data */
+ list_for_each(&regions, region, list) {
+ if (!region_is_reserved(region))
+ continue;
+ len = strlen(region->name) + 1;
+ memcpy(name, region->name, len);
+ name += len;
+
+ printf(" 0x%012llx..%012llx : %s\n",
+ (long long)region->start,
+ (long long)(region->start + region->len - 1),
+ region->name);
+
+ range[0] = cpu_to_fdt64(region->start);
+ range[1] = cpu_to_fdt64(region->len);
+ range += 2;
+ }
+ unlock(&mem_region_lock);
+
+ dt_add_property(dt_root, "reserved-names", names, names_len);
+ dt_add_property(dt_root, "reserved-ranges", ranges, ranges_len);
+
+ free(names);
+ free(ranges);
+}
diff --git a/core/nvram.c b/core/nvram.c
new file mode 100644
index 0000000..f25d6aa
--- /dev/null
+++ b/core/nvram.c
@@ -0,0 +1,248 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <fsp.h>
+#include <opal.h>
+#include <lock.h>
+#include <device.h>
+#include <platform.h>
+
+static void *nvram_image;
+static uint32_t nvram_size;
+static bool nvram_ready;
+
+static int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset)
+{
+ if (!nvram_ready)
+ return OPAL_HARDWARE;
+ if (offset >= nvram_size || (offset + size) > nvram_size)
+ return OPAL_PARAMETER;
+
+ memcpy((void *)buffer, nvram_image + offset, size);
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_READ_NVRAM, opal_read_nvram, 3);
+
+static int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset)
+{
+ if (!nvram_ready)
+ return OPAL_HARDWARE;
+ if (offset >= nvram_size || (offset + size) > nvram_size)
+ return OPAL_PARAMETER;
+ memcpy(nvram_image + offset, (void *)buffer, size);
+ if (platform.nvram_write)
+ platform.nvram_write(offset, nvram_image + offset, size);
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_WRITE_NVRAM, opal_write_nvram, 3);
+
+struct chrp_nvram_hdr {
+ uint8_t sig;
+ uint8_t cksum;
+ uint16_t len;
+ char name[12];
+};
+
+#define NVRAM_SIG_FW_PRIV 0x51
+#define NVRAM_SIG_SYSTEM 0x70
+#define NVRAM_SIG_FREE 0x7f
+
+#define NVRAM_NAME_COMMON "common"
+#define NVRAM_NAME_FW_PRIV "ibm,skiboot"
+#define NVRAM_NAME_FREE "wwwwwwwwwwww"
+
+/* 64k should be enough, famous last words... */
+#define NVRAM_SIZE_COMMON 0x10000
+
+/* 4k should be enough, famous last words... */
+#define NVRAM_SIZE_FW_PRIV 0x1000
+
+static uint8_t chrp_nv_cksum(struct chrp_nvram_hdr *hdr)
+{
+ struct chrp_nvram_hdr h_copy = *hdr;
+ uint8_t b_data, i_sum, c_sum;
+ uint8_t *p = (uint8_t *)&h_copy;
+ unsigned int nbytes = sizeof(h_copy);
+
+ h_copy.cksum = 0;
+ for (c_sum = 0; nbytes; nbytes--) {
+ b_data = *(p++);
+ i_sum = c_sum + b_data;
+ if (i_sum < c_sum)
+ i_sum++;
+ c_sum = i_sum;
+ }
+ return c_sum;
+}
+
+static void nvram_format(void)
+{
+ struct chrp_nvram_hdr *h;
+ unsigned int offset = 0;
+
+ prerror("NVRAM: Re-initializing\n");
+ memset(nvram_image, 0, nvram_size);
+
+ /* Create private partition */
+ h = nvram_image + offset;
+ h->sig = NVRAM_SIG_FW_PRIV;
+ h->len = NVRAM_SIZE_FW_PRIV >> 4;
+ strcpy(h->name, NVRAM_NAME_FW_PRIV);
+ h->cksum = chrp_nv_cksum(h);
+ offset += NVRAM_SIZE_FW_PRIV;
+
+ /* Create common partition */
+ h = nvram_image + offset;
+ h->sig = NVRAM_SIG_SYSTEM;
+ h->len = NVRAM_SIZE_COMMON >> 4;
+ strcpy(h->name, NVRAM_NAME_COMMON);
+ h->cksum = chrp_nv_cksum(h);
+ offset += NVRAM_SIZE_COMMON;
+
+ /* Create free space partition */
+ h = nvram_image + offset;
+ h->sig = NVRAM_SIG_FREE;
+ h->len = (nvram_size - offset) >> 4;
+ strncpy(h->name, NVRAM_NAME_FREE, 12);
+ h->cksum = chrp_nv_cksum(h);
+
+ /* Write the whole thing back */
+ if (platform.nvram_write)
+ platform.nvram_write(0, nvram_image, nvram_size);
+}
+
+/*
+ * Check that the nvram partition layout is sane and that it
+ * contains our required partitions. If not, we re-format the
+ * lot of it
+ */
+static void nvram_check(void)
+{
+ unsigned int offset = 0;
+ bool found_common = false;
+ bool found_skiboot = false;
+
+ while (offset + sizeof(struct chrp_nvram_hdr) < nvram_size) {
+ struct chrp_nvram_hdr *h = nvram_image + offset;
+
+ if (chrp_nv_cksum(h) != h->cksum) {
+ prerror("NVRAM: Partition at offset 0x%x"
+ " has bad checksum\n", offset);
+ goto failed;
+ }
+ if (h->len < 1) {
+ prerror("NVRAM: Partition at offset 0x%x"
+ " has incorrect 0 length\n", offset);
+ goto failed;
+ }
+
+ if (h->sig == NVRAM_SIG_SYSTEM &&
+ strcmp(h->name, NVRAM_NAME_COMMON) == 0)
+ found_common = true;
+
+ if (h->sig == NVRAM_SIG_FW_PRIV &&
+ strcmp(h->name, NVRAM_NAME_FW_PRIV) == 0)
+ found_skiboot = true;
+
+ offset += h->len << 4;
+ if (offset > nvram_size) {
+ prerror("NVRAM: Partition at offset 0x%x"
+ " extends beyond end of nvram !\n", offset);
+ goto failed;
+ }
+ }
+ if (!found_common) {
+ prerror("NVRAM: Common partition not found !\n");
+ goto failed;
+ }
+ if (!found_skiboot) {
+ prerror("NVRAM: Skiboot private partition "
+ "not found !\n");
+ goto failed;
+ }
+
+ prerror("NVRAM: Layout appears sane\n");
+ return;
+ failed:
+ nvram_format();
+}
+
+void nvram_read_complete(bool success)
+{
+ struct dt_node *np;
+
+ /* Read not successful, error out and free the buffer */
+ if (!success) {
+ free(nvram_image);
+ nvram_size = 0;
+ return;
+ }
+
+ /* Check and maybe format nvram */
+ nvram_check();
+
+ /* Add nvram node */
+ np = dt_new(opal_node, "nvram");
+ dt_add_property_cells(np, "#bytes", nvram_size);
+ dt_add_property_string(np, "compatible", "ibm,opal-nvram");
+
+ /* Mark ready */
+ nvram_ready = true;
+}
+
+void nvram_init(void)
+{
+ int rc;
+
+ if (!platform.nvram_info)
+ return;
+ rc = platform.nvram_info(&nvram_size);
+ if (rc) {
+ prerror("NVRAM: Error %d retrieving nvram info\n", rc);
+ return;
+ }
+ printf("NVRAM: Size is %d KB\n", nvram_size >> 10);
+ if (nvram_size > 0x100000) {
+ printf("NVRAM: Cropping to 1MB !\n");
+ nvram_size = 0x100000;
+ }
+
+ /*
+ * We allocate the nvram image with 4k alignment to make the
+ * FSP backend job's easier
+ */
+ nvram_image = memalign(0x1000, nvram_size);
+ if (!nvram_image) {
+ prerror("NVRAM: Failed to allocate nvram image\n");
+ nvram_size = 0;
+ return;
+ }
+
+ /* Read it in */
+ rc = platform.nvram_start_read(nvram_image, 0, nvram_size);
+ if (rc) {
+ prerror("NVRAM: Failed to read NVRAM from FSP !\n");
+ nvram_size = 0;
+ free(nvram_image);
+ return;
+ }
+
+ /*
+ * We'll get called back later (or recursively from
+ * nvram_start_read) in nvram_read_complete()
+ */
+}
diff --git a/core/opal-msg.c b/core/opal-msg.c
new file mode 100644
index 0000000..f033b76
--- /dev/null
+++ b/core/opal-msg.c
@@ -0,0 +1,167 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <skiboot.h>
+#include <opal-msg.h>
+#include <lock.h>
+
+#define OPAL_MAX_MSGS (OPAL_MSG_TYPE_MAX + OPAL_MAX_ASYNC_COMP - 1)
+#define OPAL_MSG_PREFIX "opalmsg: "
+
+
+struct opal_msg_entry {
+ struct list_node link;
+ void (*consumed)(void *data);
+ void *data;
+ struct opal_msg msg;
+};
+
+static LIST_HEAD(msg_free_list);
+static LIST_HEAD(msg_pending_list);
+
+static struct lock opal_msg_lock = LOCK_UNLOCKED;
+
+int _opal_queue_msg(enum OpalMessageType msg_type, void *data,
+ void (*consumed)(void *data), size_t num_params,
+ const u64 *params)
+{
+ struct opal_msg_entry *entry;
+
+ lock(&opal_msg_lock);
+
+ entry = list_pop(&msg_free_list, struct opal_msg_entry, link);
+ if (!entry) {
+ prerror(OPAL_MSG_PREFIX "No available node in the free list, allocating\n");
+ entry = zalloc(sizeof(struct opal_msg_entry));
+ if (!entry) {
+ prerror(OPAL_MSG_PREFIX "Allocation failed\n");
+ unlock(&opal_msg_lock);
+ return OPAL_RESOURCE;
+ }
+ }
+
+ entry->consumed = consumed;
+ entry->data = data;
+ entry->msg.msg_type = msg_type;
+
+ if (num_params > ARRAY_SIZE(entry->msg.params)) {
+ prerror(OPAL_MSG_PREFIX "Discarding extra parameters\n");
+ num_params = ARRAY_SIZE(entry->msg.params);
+ }
+ memcpy(entry->msg.params, params, num_params*sizeof(u64));
+
+ list_add_tail(&msg_pending_list, &entry->link);
+ opal_update_pending_evt(OPAL_EVENT_MSG_PENDING,
+ OPAL_EVENT_MSG_PENDING);
+
+ unlock(&opal_msg_lock);
+
+ return 0;
+}
+
+static int64_t opal_get_msg(uint64_t *buffer, uint64_t size)
+{
+ struct opal_msg_entry *entry;
+ void (*callback)(void *data);
+ void *data;
+
+ if (size < sizeof(struct opal_msg) || !buffer)
+ return OPAL_PARAMETER;
+
+ lock(&opal_msg_lock);
+
+ entry = list_pop(&msg_pending_list, struct opal_msg_entry, link);
+ if (!entry) {
+ unlock(&opal_msg_lock);
+ return OPAL_RESOURCE;
+ }
+
+ memcpy(buffer, &entry->msg, sizeof(entry->msg));
+ callback = entry->consumed;
+ data = entry->data;
+
+ list_add(&msg_free_list, &entry->link);
+ if (list_empty(&msg_pending_list))
+ opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, 0);
+
+ unlock(&opal_msg_lock);
+
+ if (callback)
+ callback(data);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_GET_MSG, opal_get_msg, 2);
+
+static int64_t opal_check_completion(uint64_t *buffer, uint64_t size,
+ uint64_t token)
+{
+ struct opal_msg_entry *entry, *next_entry;
+ void (*callback)(void *data) = NULL;
+ int rc = OPAL_BUSY;
+ void *data = NULL;
+
+ lock(&opal_msg_lock);
+ list_for_each_safe(&msg_pending_list, entry, next_entry, link) {
+ if (entry->msg.msg_type == OPAL_MSG_ASYNC_COMP &&
+ entry->msg.params[0] == token) {
+ list_del(&entry->link);
+ callback = entry->consumed;
+ data = entry->data;
+ list_add(&msg_free_list, &entry->link);
+ if (list_empty(&msg_pending_list))
+ opal_update_pending_evt(OPAL_EVENT_MSG_PENDING,
+ 0);
+ rc = OPAL_SUCCESS;
+ break;
+ }
+ }
+
+ if (rc == OPAL_SUCCESS && size >= sizeof(struct opal_msg))
+ memcpy(buffer, &entry->msg, sizeof(entry->msg));
+
+ unlock(&opal_msg_lock);
+
+ if (callback)
+ callback(data);
+
+ return rc;
+
+}
+opal_call(OPAL_CHECK_ASYNC_COMPLETION, opal_check_completion, 3);
+
+void opal_init_msg(void)
+{
+ struct opal_msg_entry *entry;
+ int i;
+
+ for (i = 0; i < OPAL_MAX_MSGS; i++, entry++) {
+ entry = zalloc(sizeof(*entry));
+ if (!entry)
+ goto err;
+ list_add_tail(&msg_free_list, &entry->link);
+ }
+ return;
+
+err:
+ for (; i > 0; i--) {
+ entry = list_pop(&msg_free_list, struct opal_msg_entry, link);
+ if (entry)
+ free(entry);
+ }
+}
+
diff --git a/core/opal.c b/core/opal.c
new file mode 100644
index 0000000..2727fd5
--- /dev/null
+++ b/core/opal.c
@@ -0,0 +1,308 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <opal.h>
+#include <stack.h>
+#include <lock.h>
+#include <fsp.h>
+#include <cpu.h>
+#include <interrupts.h>
+#include <op-panel.h>
+#include <device.h>
+#include <console.h>
+#include <trace.h>
+#include <timebase.h>
+#include <affinity.h>
+#include <opal-msg.h>
+
+/* Pending events to signal via opal_poll_events */
+uint64_t opal_pending_events;
+
+/* OPAL dispatch table defined in head.S */
+extern uint64_t opal_branch_table[];
+
+/* Number of args expected for each call. */
+static u8 opal_num_args[OPAL_LAST+1];
+
+/* OPAL anchor node */
+struct dt_node *opal_node;
+
+extern uint32_t attn_trigger;
+extern uint32_t hir_trigger;
+
+void opal_table_init(void)
+{
+ struct opal_table_entry *s = __opal_table_start;
+ struct opal_table_entry *e = __opal_table_end;
+
+ printf("OPAL table: %p .. %p, branch table: %p\n",
+ s, e, opal_branch_table);
+ while(s < e) {
+ uint64_t *func = s->func;
+ opal_branch_table[s->token] = *func;
+ opal_num_args[s->token] = s->nargs;
+ s++;
+ }
+}
+
+/* Called from head.S, thus no prototype */
+long opal_bad_token(uint64_t token);
+
+long opal_bad_token(uint64_t token)
+{
+ prerror("OPAL: Called with bad token %lld !\n", token);
+
+ return OPAL_PARAMETER;
+}
+
+/* Called from head.S, thus no prototype */
+void opal_trace_entry(struct stack_frame *eframe);
+
+/* FIXME: Do this in asm */
+void opal_trace_entry(struct stack_frame *eframe)
+{
+ union trace t;
+ unsigned nargs;
+
+ if (this_cpu()->pir != mfspr(SPR_PIR)) {
+ printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x\n",
+ mfspr(SPR_PIR), this_cpu(), this_cpu()->pir);
+ abort();
+ }
+ if (eframe->gpr[0] > OPAL_LAST)
+ nargs = 0;
+ else
+ nargs = opal_num_args[eframe->gpr[0]];
+
+ t.opal.token = eframe->gpr[0];
+ t.opal.lr = eframe->lr;
+ t.opal.sp = eframe->gpr[1];
+ memcpy(t.opal.r3_to_11, &eframe->gpr[3], nargs*sizeof(u64));
+
+ trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs]));
+}
+
+void __opal_register(uint64_t token, void *func, unsigned int nargs)
+{
+ uint64_t *opd = func;
+
+ assert(token <= OPAL_LAST);
+
+ opal_branch_table[token] = *opd;
+ opal_num_args[token] = nargs;
+}
+
+static void add_opal_firmware_node(void)
+{
+ struct dt_node *firmware = dt_new(opal_node, "firmware");
+
+ dt_add_property_string(firmware, "compatible", "ibm,opal-firmware");
+ dt_add_property_string(firmware, "name", "firmware");
+ dt_add_property_string(firmware, "git-id", gitid);
+}
+
+void add_opal_node(void)
+{
+ uint64_t base, entry, size;
+ extern uint32_t opal_entry;
+
+ /* XXX TODO: Reorg this. We should create the base OPAL
+ * node early on, and have the various sub modules populate
+ * their own entries (console etc...)
+ *
+ * The logic of which console backend to use should be
+ * extracted
+ */
+
+ entry = (uint64_t)&opal_entry;
+ base = SKIBOOT_BASE;
+ size = (CPU_STACKS_BASE +
+ (cpu_max_pir + 1) * STACK_SIZE) - SKIBOOT_BASE;
+
+ opal_node = dt_new(dt_root, "ibm,opal");
+ dt_add_property_cells(opal_node, "#address-cells", 0);
+ dt_add_property_cells(opal_node, "#size-cells", 0);
+ dt_add_property_strings(opal_node, "compatible", "ibm,opal-v2",
+ "ibm,opal-v3");
+ dt_add_property_cells(opal_node, "opal-msg-async-num", OPAL_MAX_ASYNC_COMP);
+ dt_add_property_cells(opal_node, "opal-msg-size", sizeof(struct opal_msg));
+ dt_add_property_u64(opal_node, "opal-base-address", base);
+ dt_add_property_u64(opal_node, "opal-entry-address", entry);
+ dt_add_property_u64(opal_node, "opal-runtime-size", size);
+
+ add_opal_firmware_node();
+ add_associativity_ref_point();
+ memcons_add_properties();
+ add_cpu_idle_state_properties();
+}
+
+void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values)
+{
+ static struct lock evt_lock = LOCK_UNLOCKED;
+ uint64_t new_evts;
+
+ /* XXX FIXME: Use atomics instead ??? Or caller locks (con_lock ?) */
+ lock(&evt_lock);
+ new_evts = (opal_pending_events & ~evt_mask) | evt_values;
+#ifdef OPAL_TRACE_EVT_CHG
+ printf("OPAL: Evt change: 0x%016llx -> 0x%016llx\n",
+ opal_pending_events, new_evts);
+#endif
+ opal_pending_events = new_evts;
+ unlock(&evt_lock);
+}
+
+
+static uint64_t opal_test_func(uint64_t arg)
+{
+ printf("OPAL: Test function called with arg 0x%llx\n", arg);
+
+ return 0xfeedf00d;
+}
+opal_call(OPAL_TEST, opal_test_func, 1);
+
+struct opal_poll_entry {
+ struct list_node link;
+ void (*poller)(void *data);
+ void *data;
+};
+
+static struct list_head opal_pollers = LIST_HEAD_INIT(opal_pollers);
+static struct lock opal_poll_lock = LOCK_UNLOCKED;
+
+void opal_add_poller(void (*poller)(void *data), void *data)
+{
+ struct opal_poll_entry *ent;
+
+ ent = zalloc(sizeof(struct opal_poll_entry));
+ assert(ent);
+ ent->poller = poller;
+ ent->data = data;
+ lock(&opal_poll_lock);
+ list_add_tail(&opal_pollers, &ent->link);
+ unlock(&opal_poll_lock);
+}
+
+void opal_del_poller(void (*poller)(void *data))
+{
+ struct opal_poll_entry *ent;
+
+ lock(&opal_poll_lock);
+ list_for_each(&opal_pollers, ent, link) {
+ if (ent->poller == poller) {
+ list_del(&ent->link);
+ free(ent);
+ break;
+ }
+ }
+ unlock(&opal_poll_lock);
+}
+
+static int64_t opal_poll_events(uint64_t *outstanding_event_mask)
+{
+ struct opal_poll_entry *poll_ent;
+
+ /* Check if we need to trigger an attn for test use */
+ if (attn_trigger == 0xdeadbeef) {
+ printf("Triggering attn\n");
+ assert(false);
+ }
+
+ /* Test the host initiated reset */
+ if (hir_trigger == 0xdeadbeef) {
+ fsp_trigger_reset();
+ hir_trigger = 0;
+ }
+
+ /*
+ * Only run the pollers if they aren't already running
+ * on another CPU
+ */
+ if (try_lock(&opal_poll_lock)) {
+ list_for_each(&opal_pollers, poll_ent, link)
+ poll_ent->poller(poll_ent->data);
+ unlock(&opal_poll_lock);
+ }
+
+ if (outstanding_event_mask)
+ *outstanding_event_mask = opal_pending_events;
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_POLL_EVENTS, opal_poll_events, 1);
+
+static int64_t opal_check_token(uint64_t token)
+{
+ if (token > OPAL_LAST)
+ return OPAL_TOKEN_ABSENT;
+
+ if (opal_branch_table[token])
+ return OPAL_TOKEN_PRESENT;
+
+ return OPAL_TOKEN_ABSENT;
+}
+opal_call(OPAL_CHECK_TOKEN, opal_check_token, 1);
+
+struct opal_sync_entry {
+ struct list_node link;
+ bool (*notify)(void *data);
+ void *data;
+};
+
+static struct list_head opal_syncers = LIST_HEAD_INIT(opal_syncers);
+
+void opal_add_host_sync_notifier(bool (*notify)(void *data), void *data)
+{
+ struct opal_sync_entry *ent;
+
+ ent = zalloc(sizeof(struct opal_sync_entry));
+ assert(ent);
+ ent->notify = notify;
+ ent->data = data;
+ list_add_tail(&opal_syncers, &ent->link);
+}
+
+void opal_del_host_sync_notifier(bool (*notify)(void *data))
+{
+ struct opal_sync_entry *ent;
+
+ list_for_each(&opal_syncers, ent, link) {
+ if (ent->notify == notify) {
+ list_del(&ent->link);
+ free(ent);
+ return;
+ }
+ }
+}
+
+/*
+ * OPAL call to handle host kexec'ing scenario
+ */
+static int64_t opal_sync_host_reboot(void)
+{
+ struct opal_sync_entry *ent;
+ bool ret = true;
+
+ list_for_each(&opal_syncers, ent, link)
+ ret &= ent->notify(ent->data);
+
+ if (ret)
+ return OPAL_SUCCESS;
+ else
+ return OPAL_BUSY_EVENT;
+}
+opal_call(OPAL_SYNC_HOST_REBOOT, opal_sync_host_reboot, 0);
diff --git a/core/pci-opal.c b/core/pci-opal.c
new file mode 100644
index 0000000..ee534cc
--- /dev/null
+++ b/core/pci-opal.c
@@ -0,0 +1,666 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <timebase.h>
+#include <lock.h>
+
+#define OPAL_PCICFG_ACCESS(op, cb, type) \
+static int64_t opal_pci_config_##op(uint64_t phb_id, \
+ uint64_t bus_dev_func, \
+ uint64_t offset, type data) \
+{ \
+ struct phb *phb = pci_get_phb(phb_id); \
+ int64_t rc; \
+ \
+ if (!phb) \
+ return OPAL_PARAMETER; \
+ phb->ops->lock(phb); \
+ rc = phb->ops->cfg_##cb(phb, bus_dev_func, offset, data); \
+ phb->ops->unlock(phb); \
+ pci_put_phb(phb); \
+ \
+ return rc; \
+}
+
+OPAL_PCICFG_ACCESS(read_byte, read8, uint8_t *)
+OPAL_PCICFG_ACCESS(read_half_word, read16, uint16_t *)
+OPAL_PCICFG_ACCESS(read_word, read32, uint32_t *)
+OPAL_PCICFG_ACCESS(write_byte, write8, uint8_t)
+OPAL_PCICFG_ACCESS(write_half_word, write16, uint16_t)
+OPAL_PCICFG_ACCESS(write_word, write32, uint32_t)
+
+opal_call(OPAL_PCI_CONFIG_READ_BYTE, opal_pci_config_read_byte, 4);
+opal_call(OPAL_PCI_CONFIG_READ_HALF_WORD, opal_pci_config_read_half_word, 4);
+opal_call(OPAL_PCI_CONFIG_READ_WORD, opal_pci_config_read_word, 4);
+opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4);
+opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4);
+opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4);
+
+static int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number,
+ uint8_t *freeze_state,
+ uint16_t *pci_error_type,
+ uint64_t *phb_status)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->eeh_freeze_status)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->eeh_freeze_status(phb, pe_number, freeze_state,
+ pci_error_type, NULL, phb_status);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_EEH_FREEZE_STATUS, opal_pci_eeh_freeze_status, 5);
+
+static int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number,
+ uint64_t eeh_action_token)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->eeh_freeze_clear)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->eeh_freeze_clear(phb, pe_number, eeh_action_token);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, opal_pci_eeh_freeze_clear, 3);
+
+static int64_t opal_pci_phb_mmio_enable(uint64_t phb_id, uint16_t window_type,
+ uint16_t window_num, uint16_t enable)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->phb_mmio_enable)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->phb_mmio_enable(phb, window_type, window_num, enable);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_PHB_MMIO_ENABLE, opal_pci_phb_mmio_enable, 4);
+
+static int64_t opal_pci_set_phb_mem_window(uint64_t phb_id,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint64_t addr,
+ uint64_t pci_addr,
+ uint64_t size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_phb_mem_window)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_phb_mem_window(phb, window_type, window_num,
+ addr, pci_addr, size);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PHB_MEM_WINDOW, opal_pci_set_phb_mem_window, 6);
+
+static int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number,
+ uint16_t window_type,
+ uint16_t window_num,
+ uint16_t segment_num)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->map_pe_mmio_window)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->map_pe_mmio_window(phb, pe_number, window_type,
+ window_num, segment_num);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, opal_pci_map_pe_mmio_window, 5);
+
+static int64_t opal_pci_set_phb_table_memory(uint64_t phb_id __unused,
+ uint64_t rtt_addr __unused,
+ uint64_t ivt_addr __unused,
+ uint64_t ivt_len __unused,
+ uint64_t rej_array_addr __unused,
+ uint64_t peltv_addr __unused)
+{
+ /* IODA2 (P8) stuff, TODO */
+ return OPAL_UNSUPPORTED;
+}
+opal_call(OPAL_PCI_SET_PHB_TABLE_MEMORY, opal_pci_set_phb_table_memory, 6);
+
+static int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number,
+ uint64_t bus_dev_func, uint8_t bus_compare,
+ uint8_t dev_compare, uint8_t func_compare,
+ uint8_t pe_action)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_pe)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_pe(phb, pe_number, bus_dev_func, bus_compare,
+ dev_compare, func_compare, pe_action);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PE, opal_pci_set_pe, 7);
+
+static int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe,
+ uint32_t child_pe, uint8_t state)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_peltv)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_peltv(phb, parent_pe, child_pe, state);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PELTV, opal_pci_set_peltv, 4);
+
+static int64_t opal_pci_set_mve(uint64_t phb_id, uint32_t mve_number,
+ uint32_t pe_number)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_mve)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_mve(phb, mve_number, pe_number);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_MVE, opal_pci_set_mve, 3);
+
+static int64_t opal_pci_set_mve_enable(uint64_t phb_id, uint32_t mve_number,
+ uint32_t state)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_mve_enable)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_mve_enable(phb, mve_number, state);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_MVE_ENABLE, opal_pci_set_mve_enable, 3);
+
+static int64_t opal_pci_get_xive_reissue(uint64_t phb_id __unused,
+ uint32_t xive_number __unused,
+ uint8_t *p_bit __unused,
+ uint8_t *q_bit __unused)
+{
+ /* IODA2 (P8) stuff, TODO */
+ return OPAL_UNSUPPORTED;
+}
+opal_call(OPAL_PCI_GET_XIVE_REISSUE, opal_pci_get_xive_reissue, 4);
+
+static int64_t opal_pci_set_xive_reissue(uint64_t phb_id __unused,
+ uint32_t xive_number __unused,
+ uint8_t p_bit __unused,
+ uint8_t q_bit __unused)
+{
+ /* IODA2 (P8) stuff, TODO */
+ return OPAL_UNSUPPORTED;
+}
+opal_call(OPAL_PCI_SET_XIVE_REISSUE, opal_pci_set_xive_reissue, 4);
+
+static int64_t opal_pci_msi_eoi(uint64_t phb_id,
+ uint32_t hwirq)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->pci_msi_eoi)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->pci_msi_eoi(phb, hwirq);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MSI_EOI, opal_pci_msi_eoi, 2);
+
+static int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number,
+ uint32_t xive_num)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_xive_pe)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_xive_pe(phb, pe_number, xive_num);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_XIVE_PE, opal_pci_set_xive_pe, 3);
+
+static int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num,
+ int32_t *interrupt_source_number)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_xive_source)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->get_xive_source(phb, xive_num, interrupt_source_number);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_GET_XIVE_SOURCE, opal_get_xive_source, 3);
+
+static int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number,
+ uint32_t xive_num, uint8_t msi_range,
+ uint32_t *msi_address, uint32_t *message_data)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_msi_32)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->get_msi_32(phb, mve_number, xive_num, msi_range,
+ msi_address, message_data);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_GET_MSI_32, opal_get_msi_32, 6);
+
+static int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number,
+ uint32_t xive_num, uint8_t msi_range,
+ uint64_t *msi_address, uint32_t *message_data)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_msi_64)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->get_msi_64(phb, mve_number, xive_num, msi_range,
+ msi_address, message_data);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_GET_MSI_64, opal_get_msi_64, 6);
+
+static int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number,
+ uint16_t window_id,
+ uint16_t tce_levels,
+ uint64_t tce_table_addr,
+ uint64_t tce_table_size,
+ uint64_t tce_page_size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->map_pe_dma_window)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->map_pe_dma_window(phb, pe_number, window_id,
+ tce_levels, tce_table_addr,
+ tce_table_size, tce_page_size);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW, opal_pci_map_pe_dma_window, 7);
+
+static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id,
+ uint16_t pe_number,
+ uint16_t window_id,
+ uint64_t pci_start_addr,
+ uint64_t pci_mem_size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->map_pe_dma_window_real)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->map_pe_dma_window_real(phb, pe_number, window_id,
+ pci_start_addr, pci_mem_size);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5);
+
+static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope,
+ uint8_t assert_state)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc = OPAL_SUCCESS;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops)
+ return OPAL_UNSUPPORTED;
+ if (assert_state != OPAL_ASSERT_RESET &&
+ assert_state != OPAL_DEASSERT_RESET)
+ return OPAL_PARAMETER;
+
+ phb->ops->lock(phb);
+
+ switch(reset_scope) {
+ case OPAL_RESET_PHB_COMPLETE:
+ if (!phb->ops->complete_reset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ rc = phb->ops->complete_reset(phb, assert_state);
+ if (rc < 0)
+ prerror("PHB#%d: Failure on complete reset, rc=%lld\n",
+ phb->opal_id, rc);
+ break;
+ case OPAL_RESET_PCI_FUNDAMENTAL:
+ if (!phb->ops->fundamental_reset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ /* We need do nothing on deassert time */
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = phb->ops->fundamental_reset(phb);
+ if (rc < 0)
+ prerror("PHB#%d: Failure on fundamental reset, rc=%lld\n",
+ phb->opal_id, rc);
+ break;
+ case OPAL_RESET_PCI_HOT:
+ if (!phb->ops->hot_reset) {
+ rc = OPAL_UNSUPPORTED;
+ break;
+ }
+
+ /* We need do nothing on deassert time */
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+
+ rc = phb->ops->hot_reset(phb);
+ if (rc < 0)
+ prerror("PHB#%d: Failure on hot reset, rc=%lld\n",
+ phb->opal_id, rc);
+ break;
+ case OPAL_RESET_PCI_IODA_TABLE:
+ if (assert_state != OPAL_ASSERT_RESET)
+ break;
+ if (phb->ops->ioda_reset)
+ phb->ops->ioda_reset(phb, true);
+ break;
+ default:
+ rc = OPAL_UNSUPPORTED;
+ }
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return (rc > 0) ? tb_to_msecs(rc) : rc;
+}
+opal_call(OPAL_PCI_RESET, opal_pci_reset, 3);
+
+static int64_t opal_pci_reinit(uint64_t phb_id,
+ uint64_t reinit_scope,
+ uint64_t data)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops || !phb->ops->pci_reinit)
+ return OPAL_UNSUPPORTED;
+
+ phb->ops->lock(phb);
+ rc = phb->ops->pci_reinit(phb, reinit_scope, data);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3);
+
+static int64_t opal_pci_poll(uint64_t phb_id)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops || !phb->ops->poll)
+ return OPAL_UNSUPPORTED;
+
+ phb->ops->lock(phb);
+ rc = phb->ops->poll(phb);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ /* Return milliseconds for caller to sleep: round up */
+ if (rc > 0) {
+ rc = tb_to_msecs(rc);
+ if (rc == 0)
+ rc = 1;
+ }
+
+ return rc;
+}
+opal_call(OPAL_PCI_POLL, opal_pci_poll, 1);
+
+static int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id,
+ uint64_t tce_mem_addr,
+ uint64_t tce_mem_size)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_phb_tce_memory)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->set_phb_tce_memory(phb, tce_mem_addr, tce_mem_size);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_SET_PHB_TCE_MEMORY, opal_pci_set_phb_tce_memory, 3);
+
+static int64_t opal_pci_get_phb_diag_data(uint64_t phb_id,
+ void *diag_buffer,
+ uint64_t diag_buffer_len)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_diag_data)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->get_diag_data(phb, diag_buffer, diag_buffer_len);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_GET_PHB_DIAG_DATA, opal_pci_get_phb_diag_data, 3);
+
+static int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id,
+ void *diag_buffer,
+ uint64_t diag_buffer_len)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->get_diag_data2)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->get_diag_data2(phb, diag_buffer, diag_buffer_len);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_GET_PHB_DIAG_DATA2, opal_pci_get_phb_diag_data2, 3);
+
+static int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe,
+ uint16_t *pci_error_type, uint16_t *severity)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->next_error)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+
+ /* Any call to this function clears the error event */
+ opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0);
+ rc = phb->ops->next_error(phb, first_frozen_pe, pci_error_type,
+ severity);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_NEXT_ERROR, opal_pci_next_error, 4);
+
+static int64_t opal_pci_eeh_freeze_status2(uint64_t phb_id, uint64_t pe_number,
+ uint8_t *freeze_state,
+ uint16_t *pci_error_type,
+ uint16_t *severity,
+ uint64_t *phb_status)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->eeh_freeze_status)
+ return OPAL_UNSUPPORTED;
+ phb->ops->lock(phb);
+ rc = phb->ops->eeh_freeze_status(phb, pe_number, freeze_state,
+ pci_error_type, severity, phb_status);
+ phb->ops->unlock(phb);
+ pci_put_phb(phb);
+
+ return rc;
+}
+opal_call(OPAL_PCI_EEH_FREEZE_STATUS2, opal_pci_eeh_freeze_status2, 6);
+
+static int64_t opal_pci_set_phb_capi_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number)
+{
+ struct phb *phb = pci_get_phb(phb_id);
+ int64_t rc;
+
+ if (!phb)
+ return OPAL_PARAMETER;
+ if (!phb->ops->set_capi_mode)
+ return OPAL_UNSUPPORTED;
+ if (mode == 1) {
+ phb->ops->lock(phb);
+ rc = phb->ops->set_capi_mode(phb, mode, pe_number);
+ phb->ops->unlock(phb);
+ return rc;
+ }
+ if (mode == 0) {
+ /* FIXME add support for PCI mode*/
+ }
+ return OPAL_UNSUPPORTED;
+}
+opal_call(OPAL_PCI_SET_PHB_CAPI_MODE, opal_pci_set_phb_capi_mode, 3);
diff --git a/core/pci.c b/core/pci.c
new file mode 100644
index 0000000..f07908b
--- /dev/null
+++ b/core/pci.c
@@ -0,0 +1,1388 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <pci.h>
+#include <pci-cfg.h>
+#include <timebase.h>
+#include <lock.h>
+#include <device.h>
+
+static struct lock pci_lock = LOCK_UNLOCKED;
+#define PCI_MAX_PHBs 64
+static struct phb *phbs[PCI_MAX_PHBs];
+
+#define DBG(fmt...) do { } while(0)
+
+/*
+ * Generic PCI utilities
+ */
+
+/* pci_find_cap - Find a PCI capability in a device config space
+ *
+ * This will return a config space offset (positive) or a negative
+ * error (OPAL error codes).
+ *
+ * OPAL_UNSUPPORTED is returned if the capability doesn't exist
+ */
+int64_t pci_find_cap(struct phb *phb, uint16_t bdfn, uint8_t want)
+{
+ int64_t rc;
+ uint16_t stat, cap;
+ uint8_t pos, next;
+
+ rc = pci_cfg_read16(phb, bdfn, PCI_CFG_STAT, &stat);
+ if (rc)
+ return rc;
+ if (!(stat & PCI_CFG_STAT_CAP))
+ return OPAL_UNSUPPORTED;
+ rc = pci_cfg_read8(phb, bdfn, PCI_CFG_CAP, &pos);
+ if (rc)
+ return rc;
+ pos &= 0xfc;
+ while(pos) {
+ rc = pci_cfg_read16(phb, bdfn, pos, &cap);
+ if (rc)
+ return rc;
+ if ((cap & 0xff) == want)
+ return pos;
+ next = (cap >> 8) & 0xfc;
+ if (next == pos) {
+ prerror("PHB%d: dev %04x pci_find_cap hit a loop !\n",
+ phb->opal_id, bdfn);
+ break;
+ }
+ pos = next;
+ }
+ return OPAL_UNSUPPORTED;
+}
+
+/* pci_find_ecap - Find a PCIe extended capability in a device
+ * config space
+ *
+ * This will return a config space offset (positive) or a negative
+ * error (OPAL error code). Additionally, if the "version" argument
+ * is non-NULL, the capability version will be returned there.
+ *
+ * OPAL_UNSUPPORTED is returned if the capability doesn't exist
+ */
+int64_t pci_find_ecap(struct phb *phb, uint16_t bdfn, uint16_t want,
+ uint8_t *version)
+{
+ int64_t rc;
+ uint32_t cap;
+ uint16_t off, prev = 0;
+
+ for (off = 0x100; off && off < 0x1000; off = (cap >> 20) & 0xffc ) {
+ if (off == prev) {
+ prerror("PHB%d: dev %04x pci_find_ecap hit a loop !\n",
+ phb->opal_id, bdfn);
+ break;
+ }
+ prev = off;
+ rc = pci_cfg_read32(phb, bdfn, off, &cap);
+ if (rc)
+ return rc;
+ if ((cap & 0xffff) == want) {
+ if (version)
+ *version = (cap >> 16) & 0xf;
+ return off;
+ }
+ }
+ return OPAL_UNSUPPORTED;
+}
+
+static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent,
+ uint16_t bdfn)
+{
+ struct pci_device *pd = NULL;
+ uint32_t retries, vdid, val;
+ int64_t rc, ecap;
+ uint8_t htype;
+ uint16_t capreg;
+ bool had_crs = false;
+
+ for (retries = 40; retries; retries--) {
+ rc = pci_cfg_read32(phb, bdfn, 0, &vdid);
+ if (rc)
+ return NULL;
+ if (vdid == 0xffffffff || vdid == 0x00000000)
+ return NULL;
+ if (vdid != 0xffff0001)
+ break;
+ had_crs = true;
+ time_wait_ms(100);
+ }
+ if (vdid == 0xffff0001) {
+ prerror("PCI: Device %04x CRS timeout !\n", bdfn);
+ return NULL;
+ }
+ if (had_crs)
+ printf("PCI: Device %04x replied after CRS\n", bdfn);
+ pd = zalloc(sizeof(struct pci_device));
+ if (!pd) {
+ prerror("PCI: Failed to allocate structure pci_device !\n");
+ goto fail;
+ }
+ pd->bdfn = bdfn;
+ pd->parent = parent;
+ list_head_init(&pd->children);
+ rc = pci_cfg_read8(phb, bdfn, PCI_CFG_HDR_TYPE, &htype);
+ if (rc) {
+ prerror("PCI: Failed to read header type !\n");
+ goto fail;
+ }
+ pd->is_multifunction = !!(htype & 0x80);
+ pd->is_bridge = (htype & 0x7f) != 0;
+ pd->scan_map = 0xffffffff; /* Default */
+
+ ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP);
+ if (ecap > 0) {
+ pci_set_cap(pd, PCI_CFG_CAP_ID_EXP, ecap, false);
+ pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_CAPABILITY_REG,
+ &capreg);
+ pd->dev_type = GETFIELD(PCICAP_EXP_CAP_TYPE, capreg);
+
+ /*
+ * XXX We observe a problem on some PLX switches where one
+ * of the downstream ports appears as an upstream port, we
+ * fix that up here otherwise, other code will misbehave
+ */
+ if (pd->parent && pd->dev_type == PCIE_TYPE_SWITCH_UPPORT &&
+ pd->parent->dev_type == PCIE_TYPE_SWITCH_UPPORT &&
+ vdid == 0x874810b5) {
+ prerror("PCI: Fixing up bad PLX downstream port !\n");
+ pd->dev_type = PCIE_TYPE_SWITCH_DNPORT;
+ }
+
+ /* XXX Handle ARI */
+ if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT ||
+ pd->dev_type == PCIE_TYPE_ROOT_PORT)
+ pd->scan_map = 0x1;
+
+ /* Read MPS capability, whose maximal size is 4096 */
+ pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_DEVCAP, &val);
+ pd->mps = (128 << GETFIELD(PCICAP_EXP_DEVCAP_MPSS, val));
+ if (pd->mps > 4096)
+ pd->mps = 4096;
+ } else {
+ pd->dev_type = PCIE_TYPE_LEGACY;
+ }
+
+ /* If it's a bridge, sanitize the bus numbers to avoid forwarding
+ *
+ * This will help when walking down those bridges later on
+ */
+ if (pd->is_bridge) {
+ pci_cfg_write8(phb, bdfn, PCI_CFG_PRIMARY_BUS, bdfn >> 8);
+ pci_cfg_write8(phb, bdfn, PCI_CFG_SECONDARY_BUS, 0);
+ pci_cfg_write8(phb, bdfn, PCI_CFG_SUBORDINATE_BUS, 0);
+ }
+
+ /* XXX Need to do some basic setups, such as MPSS, MRS,
+ * RCB, etc...
+ */
+
+ printf("PCI: Device %04x VID:%04x DEV:%04x TYP:%d MF%s BR%s EX%s\n",
+ bdfn, vdid & 0xffff, vdid >> 16, pd->dev_type,
+ pd->is_multifunction ? "+" : "-",
+ pd->is_bridge ? "+" : "-",
+ pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) ? "+" : "-");
+
+ /*
+ * Call PHB hook
+ */
+ if (phb->ops->device_init)
+ phb->ops->device_init(phb, pd);
+
+ return pd;
+ fail:
+ if (pd)
+ free(pd);
+ return NULL;
+}
+
+/* pci_check_clear_freeze - Probing empty slot will result in an EEH
+ * freeze. Currently we have a single PE mapping
+ * everything (default state of our backend) so
+ * we just check and clear the state of PE#0
+ *
+ * NOTE: We currently only handle simple PE freeze, not PHB fencing
+ * (or rather our backend does)
+ */
+static void pci_check_clear_freeze(struct phb *phb)
+{
+ int64_t rc;
+ uint8_t freeze_state;
+ uint16_t pci_error_type, sev;
+
+ rc = phb->ops->eeh_freeze_status(phb, 0, &freeze_state,
+ &pci_error_type, &sev, NULL);
+ if (rc)
+ return;
+ if (freeze_state == OPAL_EEH_STOPPED_NOT_FROZEN)
+ return;
+ /* We can't handle anything worse than an ER here */
+ if (sev > OPAL_EEH_SEV_NO_ERROR &&
+ sev < OPAL_EEH_SEV_PE_ER) {
+ prerror("PCI: PHB%d fatal probe error !\n", phb->opal_id);
+ return;
+ }
+ phb->ops->eeh_freeze_clear(phb, 0, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+}
+
+/* pci_enable_bridge - Called before scanning a bridge
+ *
+ * Ensures error flags are clean, disable master abort, and
+ * check if the subordinate bus isn't reset, the slot is enabled
+ * on PCIe, etc...
+ */
+static bool pci_enable_bridge(struct phb *phb, struct pci_device *pd)
+{
+ uint16_t bctl;
+ bool was_reset = false;
+ int64_t ecap = 0;
+
+ /* Disable master aborts, clear errors */
+ pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &bctl);
+ bctl &= ~PCI_CFG_BRCTL_MABORT_REPORT;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl);
+
+ /* PCI-E bridge, check the slot state */
+ if (pd->dev_type == PCIE_TYPE_ROOT_PORT ||
+ pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
+ uint16_t slctl, slcap, slsta, lctl;
+
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+
+ /* Read the slot status & check for presence detect */
+ pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTSTAT, &slsta);
+ DBG(" slstat=%04x\n", slsta);
+ if (!(slsta & PCICAP_EXP_SLOTSTAT_PDETECTST)) {
+ printf("PCI: No card in slot\n");
+ return false;
+ }
+
+ /* Read the slot capabilities */
+ pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTCAP, &slcap);
+ DBG(" slcap=%04x\n", slcap);
+ if (!(slcap & PCICAP_EXP_SLOTCAP_PWCTRL))
+ goto power_is_on;
+
+ /* Read the slot control register, check if the slot is off */
+ pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTCTL, &slctl);
+ DBG(" slctl=%04x\n", slctl);
+ if (!(slctl & PCICAP_EXP_SLOTCTL_PWRCTLR))
+ goto power_is_on;
+
+ /* Turn power on
+ *
+ * XXX This is a "command", we should wait for it to complete
+ * etc... but just waiting 2s will do for now
+ */
+ DBG("PCI: Bridge power is off, turning on ...\n");
+ slctl &= ~PCICAP_EXP_SLOTCTL_PWRCTLR;
+ slctl |= SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, 0, PCIE_INDIC_ON);
+ pci_cfg_write16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTCTL, slctl);
+
+ /* Wait a couple of seconds */
+ time_wait_ms(2000);
+
+ power_is_on:
+ /* Enable link */
+ pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_LCTL, &lctl);
+ DBG(" lctl=%04x\n", lctl);
+ lctl &= ~PCICAP_EXP_LCTL_LINK_DIS;
+ pci_cfg_write16(phb, pd->bdfn, ecap+PCICAP_EXP_LCTL, lctl);
+ }
+
+ /* Clear secondary reset */
+ if (bctl & PCI_CFG_BRCTL_SECONDARY_RESET) {
+ printf("PCI: Bridge secondary reset is on, clearing it ...\n");
+ bctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl);
+ time_wait_ms(1000);
+ was_reset = true;
+ }
+
+ /* PCI-E bridge, wait for link */
+ if (pd->dev_type == PCIE_TYPE_ROOT_PORT ||
+ pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) {
+ uint32_t lcap;
+
+ /* Read link caps */
+ pci_cfg_read32(phb, pd->bdfn, ecap+PCICAP_EXP_LCAP, &lcap);
+
+ /* Did link capability say we got reporting ?
+ *
+ * If yes, wait up to 10s, if not, wait 1s if we didn't already
+ */
+ if (lcap & PCICAP_EXP_LCAP_DL_ACT_REP) {
+ uint32_t retries = 100;
+ uint16_t lstat;
+
+ printf("%016lx: waiting for link... \n", mftb());
+
+ while(retries--) {
+ pci_cfg_read16(phb, pd->bdfn,
+ ecap+PCICAP_EXP_LSTAT, &lstat);
+ if (lstat & PCICAP_EXP_LSTAT_DLLL_ACT)
+ break;
+ time_wait_ms(100);
+ }
+ printf("%016lx: end wait for link...\n", mftb());
+ if (!(lstat & PCICAP_EXP_LSTAT_DLLL_ACT)) {
+ prerror("PCI: Bridge %04x, timeout waiting"
+ " for downstream link\n", pd->bdfn);
+ return false;
+ }
+ /* Need to wait another 100ms before touching
+ * the config space
+ */
+ time_wait_ms(100);
+ } else if (!was_reset)
+ time_wait_ms(1000);
+ }
+
+ /* Clear error status */
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_STAT, 0xffff);
+
+ return true;
+}
+
+/* Clear up bridge resources */
+static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd)
+{
+ uint16_t cmd;
+
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_BASE_U16, 0xffff);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_BASE, 0xf0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_LIMIT_U16, 0);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_LIMIT, 0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_BASE, 0xfff0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_LIMIT, 0);
+ pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE_U32, 0xffffffff);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE, 0xfff0);
+ pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT_U32, 0);
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT, 0);
+
+ /* Note: This is a bit fishy but since we have closed all the
+ * bridge windows above, it shouldn't be a problem. Basically
+ * we enable Memory, IO and Bus Master on the bridge because
+ * some versions of Linux will fail to do it themselves.
+ */
+ pci_cfg_read16(phb, pd->bdfn, PCI_CFG_CMD, &cmd);
+ cmd |= PCI_CFG_CMD_IO_EN | PCI_CFG_CMD_MEM_EN;
+ cmd |= PCI_CFG_CMD_BUS_MASTER_EN;
+ pci_cfg_write16(phb, pd->bdfn, PCI_CFG_CMD, cmd);
+}
+
+
+/* pci_scan - Perform a recursive scan of the bus at bus_number
+ * populating the list passed as an argument. This also
+ * performs the bus numbering, so it returns the largest
+ * bus number that was assigned.
+ *
+ * Note: Eventually this might want to access some VPD information
+ * in order to know what slots to scan and what not etc..
+ *
+ * XXX NOTE: We might want to enable ARI along the way...
+ *
+ * XXX NOTE: We might also want to setup the PCIe MPS/MRSS properly
+ * here as Linux may or may not do it
+ */
+static uint8_t pci_scan(struct phb *phb, uint8_t bus, uint8_t max_bus,
+ struct list_head *list, struct pci_device *parent,
+ bool scan_downstream)
+{
+ struct pci_device *pd = NULL;
+ uint8_t dev, fn, next_bus, max_sub, save_max;
+ uint32_t scan_map;
+
+ /* Decide what to scan */
+ scan_map = parent ? parent->scan_map : phb->scan_map;
+
+ /* Do scan */
+ for (dev = 0; dev < 32; dev++) {
+ if (!(scan_map & (1ul << dev)))
+ continue;
+
+ /* Scan the device */
+ pd = pci_scan_one(phb, parent, (bus << 8) | (dev << 3));
+ pci_check_clear_freeze(phb);
+ if (!pd)
+ continue;
+
+ /* Get slot info if any */
+ if (platform.pci_get_slot_info)
+ platform.pci_get_slot_info(phb, pd);
+
+ /* Link it up */
+ list_add_tail(list, &pd->link);
+
+ /* XXX Handle ARI */
+ if (!pd->is_multifunction)
+ continue;
+ for (fn = 1; fn < 8; fn++) {
+ pd = pci_scan_one(phb, parent,
+ ((uint16_t)bus << 8) | (dev << 3) | fn);
+ pci_check_clear_freeze(phb);
+ if (pd) {
+ if (platform.pci_get_slot_info)
+ platform.pci_get_slot_info(phb, pd);
+ list_add_tail(list, &pd->link);
+ }
+ }
+ }
+
+ /*
+ * We only scan downstream if instructed to do so by the
+ * caller. Typically we avoid the scan when we know the
+ * link is down already, which happens for the top level
+ * root complex, and avoids a long secondary timeout
+ */
+ if (!scan_downstream)
+ return bus;
+
+ next_bus = bus + 1;
+ max_sub = bus;
+ save_max = max_bus;
+
+ /* Scan down bridges */
+ list_for_each(list, pd, link) {
+ bool use_max, do_scan;
+
+ if (!pd->is_bridge)
+ continue;
+
+ /* We need to figure out a new bus number to start from.
+ *
+ * This can be tricky due to our HW constraints which differ
+ * from bridge to bridge so we are going to let the phb
+ * driver decide what to do. This can return us a maximum
+ * bus number to assign as well
+ *
+ * This function will:
+ *
+ * - Return the bus number to use as secondary for the
+ * bridge or 0 for a failure
+ *
+ * - "max_bus" will be adjusted to represent the max
+ * subordinate that can be associated with the downstream
+ * device
+ *
+ * - "use_max" will be set to true if the returned max_bus
+ * *must* be used as the subordinate bus number of that
+ * bridge (when we need to give aligned powers of two's
+ * on P7IOC). If is is set to false, we just adjust the
+ * subordinate bus number based on what we probed.
+ *
+ */
+ max_bus = save_max;
+ next_bus = phb->ops->choose_bus(phb, pd, next_bus,
+ &max_bus, &use_max);
+
+ /* Configure the bridge with the returned values */
+ if (next_bus <= bus) {
+ printf("PCI: Bridge %04x, out of bus numbers !\n",
+ pd->bdfn);
+ max_bus = next_bus = 0; /* Failure case */
+ }
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, next_bus);
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_bus);
+ if (!next_bus)
+ break;
+
+ printf("PCI: Bridge %04x, bus: %02x..%02x %s scanning...\n",
+ pd->bdfn, next_bus, max_bus, use_max ? "[use max]" : "");
+
+ /* Clear up bridge resources */
+ pci_cleanup_bridge(phb, pd);
+
+ /* Configure the bridge. This will enable power to the slot
+ * if it's currently disabled, lift reset, etc...
+ *
+ * Return false if we know there's nothing behind the bridge
+ */
+ do_scan = pci_enable_bridge(phb, pd);
+
+ /* Perform recursive scan */
+ if (do_scan) {
+ max_sub = pci_scan(phb, next_bus, max_bus,
+ &pd->children, pd, true);
+ } else if (!use_max) {
+ /* XXX Empty bridge... we leave room for hotplug
+ * slots etc.. but we should be smarter at figuring
+ * out if this is actually a hotpluggable one
+ */
+ max_sub = next_bus + 4;
+ if (max_sub > max_bus)
+ max_sub = max_bus;
+ }
+
+ /* Update the max subordinate as described previously */
+ if (use_max)
+ max_sub = max_bus;
+ pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_sub);
+ next_bus = max_sub + 1;
+ }
+
+ return max_sub;
+}
+
+static int pci_get_mps(struct phb *phb,
+ struct pci_device *pd, void *userdata)
+{
+ uint32_t *mps = (uint32_t *)userdata;
+
+ /* Only check PCI device that had MPS capacity */
+ if (phb && pd && pd->mps && *mps > pd->mps)
+ *mps = pd->mps;
+
+ return 0;
+}
+
+static int __pci_configure_mps(struct phb *phb,
+ struct pci_device *pd,
+ void *userdata __unused)
+{
+ uint32_t ecap, mps = phb->mps;
+ uint16_t val;
+
+ /* If the MPS isn't acceptable one, bail immediately */
+ if (mps < 128 || mps > 4096)
+ return 1;
+
+ if (!phb || !pd)
+ return 0;
+
+ /* PCIe deivce always has MPS capacity */
+ if (pd->mps) {
+ ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false);
+ mps = ilog2(mps) - 7;
+
+ pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, &val);
+ val = SETFIELD(PCICAP_EXP_DEVCTL_MPS, val, mps);
+ pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, val);
+ }
+
+ return 0;
+}
+
+int32_t pci_configure_mps(struct phb *phb, struct pci_device *pd)
+{
+ return __pci_configure_mps(phb, pd, NULL);
+}
+
+/*
+ * The power state would be checked. If the power has
+ * been on, we will issue fundamental reset. Otherwise,
+ * we will power it on before issuing fundamental reset.
+ */
+static int64_t pci_reset_phb(struct phb *phb)
+{
+ const char *desc;
+ int64_t rc;
+
+ rc = phb->ops->power_state(phb);
+ if (rc < 0) {
+ printf("PHB%d: Failed to get power state, rc=%lld\n",
+ phb->opal_id, rc);
+ return rc;
+ }
+
+ if (rc == OPAL_SHPC_POWER_ON) {
+ desc = "fundamental reset";
+ rc = phb->ops->fundamental_reset(phb);
+ } else {
+ desc = "power on";
+ rc = phb->ops->slot_power_on(phb);
+ }
+
+ if (rc < 0) {
+ /* Don't warn if it's just an empty slot */
+ if (rc != OPAL_CLOSED)
+ printf("PHB%d: Failed to %s, rc=%lld\n",
+ phb->opal_id, desc, rc);
+ return rc;
+ }
+
+ /* Wait the internal state machine */
+ while (rc > 0) {
+ time_wait(rc);
+ rc = phb->ops->poll(phb);
+ }
+ if (rc < 0)
+ printf("PHB%d: Failed to %s, rc=%lld\n",
+ phb->opal_id, desc, rc);
+
+ return rc;
+}
+
+static void pci_init_slot(struct phb *phb)
+{
+ uint32_t mps = 0xffffffff;
+ int64_t rc;
+ bool has_link;
+
+ printf("PHB%d: Init slot\n", phb->opal_id);
+
+ /*
+ * For PCI/PCI-X, we get the slot info and we also
+ * check if the PHB has anything connected to it
+ */
+ if (phb->phb_type < phb_type_pcie_v1) {
+ if (platform.pci_get_slot_info)
+ platform.pci_get_slot_info(phb, NULL);
+ rc = phb->ops->presence_detect(phb);
+ if (rc != OPAL_SHPC_DEV_PRESENT) {
+ printf("PHB%d: Slot empty\n", phb->opal_id);
+ return;
+ }
+ }
+
+ /*
+ * Power on the PHB, the PHB should be reset in
+ * fundamental way while powering on. The reset
+ * state machine is going to wait for the link
+ */
+ rc = pci_reset_phb(phb);
+ if (rc && rc != OPAL_CLOSED)
+ return;
+
+ /* It's up, print some things */
+ rc = phb->ops->link_state(phb);
+ if (rc < 0) {
+ printf("PHB%d: Failed to query link state, rc=%lld\n",
+ phb->opal_id, rc);
+ return;
+ }
+ has_link = rc != OPAL_SHPC_LINK_DOWN;
+
+ if(!has_link)
+ printf("PHB%d: Link down\n", phb->opal_id);
+ else if (phb->phb_type >= phb_type_pcie_v1)
+ printf("PHB%d: Link up at x%lld width\n", phb->opal_id, rc);
+
+ printf("PHB%d: Scanning (upstream%s)...\n", phb->opal_id,
+ has_link ? "+downsteam" : " only");
+ pci_scan(phb, 0, 0xff, &phb->devices, NULL, has_link);
+
+ /* Configre MPS (Max Payload Size) for PCIe domain */
+ pci_walk_dev(phb, pci_get_mps, &mps);
+ phb->mps = mps;
+ pci_walk_dev(phb, __pci_configure_mps, NULL);
+}
+
+int64_t pci_register_phb(struct phb *phb)
+{
+ int64_t rc = OPAL_SUCCESS;
+ unsigned int i;
+
+ lock(&pci_lock);
+ for (i = 0; i < PCI_MAX_PHBs; i++)
+ if (!phbs[i])
+ break;
+ if (i >= PCI_MAX_PHBs) {
+ prerror("PHB: Failed to find a free ID slot\n");
+ rc = OPAL_RESOURCE;
+ } else {
+ phbs[i] = phb;
+ phb->opal_id = i;
+ dt_add_property_cells(phb->dt_node, "ibm,opal-phbid",
+ 0, phb->opal_id);
+ printf("PCI: Registered PHB ID %d\n", i);
+ }
+ list_head_init(&phb->devices);
+ unlock(&pci_lock);
+
+ return rc;
+}
+
+int64_t pci_unregister_phb(struct phb *phb)
+{
+ /* XXX We want some kind of RCU or RWlock to make things
+ * like that happen while no OPAL callback is in progress,
+ * that way we avoid taking a lock in each of them.
+ *
+ * Right now we don't unregister so we are fine
+ */
+ lock(&pci_lock);
+ phbs[phb->opal_id] = phb;
+ unlock(&pci_lock);
+
+ return OPAL_SUCCESS;
+}
+
+struct phb *pci_get_phb(uint64_t phb_id)
+{
+ if (phb_id >= PCI_MAX_PHBs)
+ return NULL;
+
+ /* XXX See comment in pci_unregister_phb() about locking etc... */
+ return phbs[phb_id];
+}
+
+static const char *pci_class_name(uint32_t class_code)
+{
+ uint8_t class = class_code >> 16;
+ uint8_t sub = (class_code >> 8) & 0xff;
+ uint8_t pif = class_code & 0xff;
+
+ switch(class) {
+ case 0x00:
+ switch(sub) {
+ case 0x00: return "device";
+ case 0x01: return "vga";
+ }
+ break;
+ case 0x01:
+ switch(sub) {
+ case 0x00: return "scsi";
+ case 0x01: return "ide";
+ case 0x02: return "fdc";
+ case 0x03: return "ipi";
+ case 0x04: return "raid";
+ case 0x05: return "ata";
+ case 0x06: return "sata";
+ case 0x07: return "sas";
+ default: return "mass-storage";
+ }
+ case 0x02:
+ switch(sub) {
+ case 0x00: return "ethernet";
+ case 0x01: return "token-ring";
+ case 0x02: return "fddi";
+ case 0x03: return "atm";
+ case 0x04: return "isdn";
+ case 0x05: return "worldfip";
+ case 0x06: return "picmg";
+ default: return "network";
+ }
+ case 0x03:
+ switch(sub) {
+ case 0x00: return "vga";
+ case 0x01: return "xga";
+ case 0x02: return "3d-controller";
+ default: return "display";
+ }
+ case 0x04:
+ switch(sub) {
+ case 0x00: return "video";
+ case 0x01: return "sound";
+ case 0x02: return "telephony";
+ default: return "multimedia-device";
+ }
+ case 0x05:
+ switch(sub) {
+ case 0x00: return "memory";
+ case 0x01: return "flash";
+ default: return "memory-controller";
+ }
+ case 0x06:
+ switch(sub) {
+ case 0x00: return "host";
+ case 0x01: return "isa";
+ case 0x02: return "eisa";
+ case 0x03: return "mca";
+ case 0x04: return "pci";
+ case 0x05: return "pcmcia";
+ case 0x06: return "nubus";
+ case 0x07: return "cardbus";
+ case 0x08: return "raceway";
+ case 0x09: return "semi-transparent-pci";
+ case 0x0a: return "infiniband";
+ default: return "unknown-bridge";
+ }
+ case 0x07:
+ switch(sub) {
+ case 0x00:
+ switch(pif) {
+ case 0x01: return "16450-serial";
+ case 0x02: return "16550-serial";
+ case 0x03: return "16650-serial";
+ case 0x04: return "16750-serial";
+ case 0x05: return "16850-serial";
+ case 0x06: return "16950-serial";
+ default: return "serial";
+ }
+ case 0x01:
+ switch(pif) {
+ case 0x01: return "bi-directional-parallel";
+ case 0x02: return "ecp-1.x-parallel";
+ case 0x03: return "ieee1284-controller";
+ case 0xfe: return "ieee1284-device";
+ default: return "parallel";
+ }
+ case 0x02: return "multiport-serial";
+ case 0x03:
+ switch(pif) {
+ case 0x01: return "16450-modem";
+ case 0x02: return "16550-modem";
+ case 0x03: return "16650-modem";
+ case 0x04: return "16750-modem";
+ default: return "modem";
+ }
+ case 0x04: return "gpib";
+ case 0x05: return "smart-card";
+ default: return "communication-controller";
+ }
+ case 0x08:
+ switch(sub) {
+ case 0x00:
+ switch(pif) {
+ case 0x01: return "isa-pic";
+ case 0x02: return "eisa-pic";
+ case 0x10: return "io-apic";
+ case 0x20: return "iox-apic";
+ default: return "interrupt-controller";
+ }
+ case 0x01:
+ switch(pif) {
+ case 0x01: return "isa-dma";
+ case 0x02: return "eisa-dma";
+ default: return "dma-controller";
+ }
+ case 0x02:
+ switch(pif) {
+ case 0x01: return "isa-system-timer";
+ case 0x02: return "eisa-system-timer";
+ default: return "timer";
+ }
+ case 0x03:
+ switch(pif) {
+ case 0x01: return "isa-rtc";
+ default: return "rtc";
+ }
+ case 0x04: return "hotplug-controller";
+ case 0x05: return "sd-host-controller";
+ default: return "system-peripheral";
+ }
+ case 0x09:
+ switch(sub) {
+ case 0x00: return "keyboard";
+ case 0x01: return "pen";
+ case 0x02: return "mouse";
+ case 0x03: return "scanner";
+ case 0x04: return "gameport";
+ default: return "input-controller";
+ }
+ case 0x0a:
+ switch(sub) {
+ case 0x00: return "clock";
+ default: return "docking-station";
+ }
+ case 0x0b:
+ switch(sub) {
+ case 0x00: return "386";
+ case 0x01: return "486";
+ case 0x02: return "pentium";
+ case 0x10: return "alpha";
+ case 0x20: return "powerpc";
+ case 0x30: return "mips";
+ case 0x40: return "co-processor";
+ default: return "cpu";
+ }
+ case 0x0c:
+ switch(sub) {
+ case 0x00: return "firewire";
+ case 0x01: return "access-bus";
+ case 0x02: return "ssa";
+ case 0x03:
+ switch(pif) {
+ case 0x00: return "usb-uhci";
+ case 0x10: return "usb-ohci";
+ case 0x20: return "usb-ehci";
+ case 0x30: return "usb-xhci";
+ case 0xfe: return "usb-device";
+ default: return "usb";
+ }
+ case 0x04: return "fibre-channel";
+ case 0x05: return "smb";
+ case 0x06: return "infiniband";
+ case 0x07:
+ switch(pif) {
+ case 0x00: return "impi-smic";
+ case 0x01: return "impi-kbrd";
+ case 0x02: return "impi-bltr";
+ default: return "impi";
+ }
+ case 0x08: return "secos";
+ case 0x09: return "canbus";
+ default: return "serial-bus";
+ }
+ case 0x0d:
+ switch(sub) {
+ case 0x00: return "irda";
+ case 0x01: return "consumer-ir";
+ case 0x10: return "rf-controller";
+ case 0x11: return "bluetooth";
+ case 0x12: return "broadband";
+ case 0x20: return "enet-802.11a";
+ case 0x21: return "enet-802.11b";
+ default: return "wireless-controller";
+ }
+ case 0x0e: return "intelligent-controller";
+ case 0x0f:
+ switch(sub) {
+ case 0x01: return "satellite-tv";
+ case 0x02: return "satellite-audio";
+ case 0x03: return "satellite-voice";
+ case 0x04: return "satellite-data";
+ default: return "satellite-device";
+ }
+ case 0x10:
+ switch(sub) {
+ case 0x00: return "network-encryption";
+ case 0x01: return "entertainment-encryption";
+ default: return "encryption";
+ }
+ case 0x011:
+ switch(sub) {
+ case 0x00: return "dpio";
+ case 0x01: return "counter";
+ case 0x10: return "measurement";
+ case 0x20: return "management-card";
+ default: return "data-processing";
+ }
+ }
+ return "device";
+}
+
+void pci_std_swizzle_irq_map(struct dt_node *np,
+ struct pci_device *pd,
+ struct pci_lsi_state *lstate,
+ uint8_t swizzle)
+{
+ uint32_t *map, *p;
+ int dev, irq;
+ size_t map_size;
+
+ /* Size in bytes of a target interrupt */
+ size_t isize = lstate->int_size * sizeof(uint32_t);
+
+ /* Calculate the size of a map entry:
+ *
+ * 3 cells : PCI Address
+ * 1 cell : PCI IRQ
+ * 1 cell : PIC phandle
+ * n cells : PIC irq (n = lstate->int_size)
+ *
+ * Assumption: PIC address is 0-size
+ */
+ int esize = 3 + 1 + 1 + lstate->int_size;
+
+ /* Number of map "device" entries
+ *
+ * A PCI Express root or downstream port needs only one
+ * entry for device 0. Anything else will get a full map
+ * for all possible 32 child device numbers
+ *
+ * If we have been passed a host bridge (pd == NULL) we also
+ * do a simple per-pin map
+ */
+ int edevcount;
+
+ if (!pd || (pd->dev_type == PCIE_TYPE_ROOT_PORT ||
+ pd->dev_type == PCIE_TYPE_SWITCH_DNPORT)) {
+ edevcount = 1;
+ dt_add_property_cells(np, "interrupt-map-mask", 0, 0, 0, 7);
+ } else {
+ edevcount = 32;
+ dt_add_property_cells(np, "interrupt-map-mask",
+ 0xf800, 0, 0, 7);
+ }
+ map_size = esize * edevcount * 4 * sizeof(uint32_t);
+ map = p = zalloc(map_size);
+
+ for (dev = 0; dev < edevcount; dev++) {
+ for (irq = 0; irq < 4; irq++) {
+ /* Calculate pin */
+ uint32_t new_irq = (irq + dev + swizzle) % 4;
+
+ /* PCI address portion */
+ *(p++) = dev << (8 + 3);
+ *(p++) = 0;
+ *(p++) = 0;
+
+ /* PCI interrupt portion */
+ *(p++) = irq + 1;
+
+ /* Parent phandle */
+ *(p++) = lstate->int_parent[new_irq];
+
+ /* Parent desc */
+ memcpy(p, lstate->int_val[new_irq], isize);
+ p += lstate->int_size;
+ }
+ }
+
+ dt_add_property(np, "interrupt-map", map, map_size);
+ free(map);
+}
+
+static void pci_add_slot_properties(struct phb *phb, struct pci_slot_info *info,
+ struct dt_node *np)
+{
+ char loc_code[LOC_CODE_SIZE];
+ size_t base_loc_code_len, slot_label_len;
+
+ if (phb->base_loc_code) {
+ base_loc_code_len = strlen(phb->base_loc_code);
+ slot_label_len = strlen(info->label);
+ if ((base_loc_code_len + slot_label_len +1) < LOC_CODE_SIZE) {
+ strcpy(loc_code, phb->base_loc_code);
+ strcat(loc_code, "-");
+ strcat(loc_code, info->label);
+ dt_add_property(np, "ibm,slot-location-code",
+ loc_code, strlen(loc_code) + 1);
+ } else
+ prerror("PCI: Loc Code too long - %zu + %zu + 1\n",
+ base_loc_code_len, slot_label_len);
+ } else
+ DBG("PCI: Base Loc code not found...\n");
+
+ /* Add other slot information */
+ dt_add_property_cells(np, "ibm,slot-pluggable", info->pluggable);
+ dt_add_property_cells(np, "ibm,slot-power-ctl", info->power_ctl);
+ dt_add_property_cells(np, "ibm,slot-wired-lanes", info->wired_lanes);
+ /*dt_add_property(np, "ibm,slot-bus-clock", &pd->slot_info->bus_clock, sizeof(uint8_t));*/
+ dt_add_property_cells(np, "ibm,slot-connector-type", info->connector_type);
+ dt_add_property_cells(np, "ibm,slot-card-desc", info->card_desc);
+ dt_add_property_cells(np, "ibm,slot-card-mech", info->card_mech);
+ dt_add_property_cells(np, "ibm,slot-pwr-led-ctl", info->pwr_led_ctl);
+ dt_add_property_cells(np, "ibm,slot-attn-led-ctl", info->attn_led_ctl);
+ dt_add_property_string(np, "ibm,slot-label", info->label);
+}
+
+static void pci_add_loc_code(struct dt_node *np)
+{
+ struct dt_node *p = np->parent;
+ const char *blcode = NULL;
+
+ /* Look for a parent with a slot-location-code */
+ while (p && !blcode) {
+ blcode = dt_prop_get_def(p, "ibm,slot-location-code", NULL);
+ p = p->parent;
+ }
+ if (!blcode)
+ return;
+ dt_add_property_string(np, "ibm,loc-code", blcode);
+}
+
+static void pci_print_summary_line(struct phb *phb, struct pci_device *pd,
+ struct dt_node *np, u32 rev_class,
+ const char *cname)
+{
+ const char *label, *dtype, *s;
+ u32 vdid;
+#define MAX_SLOTSTR 32
+ char slotstr[MAX_SLOTSTR + 1] = { 0, };
+
+ pci_cfg_read32(phb, pd->bdfn, 0, &vdid);
+
+ /* If it's a slot, it has a slot-label */
+ label = dt_prop_get_def(np, "ibm,slot-label", NULL);
+ if (label) {
+ u32 lanes = dt_prop_get_u32_def(np, "ibm,slot-wired-lanes", 0);
+ static const char *lanestrs[] = {
+ "", " x1", " x2", " x4", " x8", "x16", "x32", "32b", "64b"
+ };
+ const char *lstr = lanes > PCI_SLOT_WIRED_LANES_PCIX_64 ? "" : lanestrs[lanes];
+ snprintf(slotstr, MAX_SLOTSTR, "SLOT=%3s %s", label, lstr);
+ /* XXX Add more slot info */
+ } else {
+ /*
+ * No label, ignore downstream switch legs and root complex,
+ * Those would essentially be non-populated
+ */
+ if (pd->dev_type != PCIE_TYPE_ROOT_PORT &&
+ pd->dev_type != PCIE_TYPE_SWITCH_DNPORT) {
+ /* It's a mere device, get loc code */
+ s = dt_prop_get_def(np, "ibm,loc-code", NULL);
+ if (s)
+ snprintf(slotstr, MAX_SLOTSTR, "LOC_CODE=%s", s);
+ }
+ }
+
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) {
+ static const char *pcie_types[] = {
+ "EP ", "LGCY", "????", "????", "ROOT", "SWUP", "SWDN",
+ "ETOX", "XTOE", "RINT", "EVTC" };
+ if (pd->dev_type >= ARRAY_SIZE(pcie_types))
+ dtype = "????";
+ else
+ dtype = pcie_types[pd->dev_type];
+ } else
+ dtype = pd->is_bridge ? "PCIB" : "PCID";
+
+ if (pd->is_bridge) {
+ uint8_t sec_bus, sub_bus;
+ pci_cfg_read8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, &sec_bus);
+ pci_cfg_read8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, &sub_bus);
+ printf(" %04x:%02x:%02x.%x [%s] %04x %04x R:%02x C:%06x B:%02x..%02x %s\n",
+ phb->opal_id, pd->bdfn >> 8, (pd->bdfn >> 3) & 0x1f,
+ pd->bdfn & 0x7, dtype, vdid & 0xffff, vdid >> 16,
+ rev_class & 0xff, rev_class >> 8, sec_bus, sub_bus, slotstr);
+ } else
+ printf(" %04x:%02x:%02x.%x [%s] %04x %04x R:%02x C:%06x (%14s) %s\n",
+ phb->opal_id, pd->bdfn >> 8, (pd->bdfn >> 3) & 0x1f,
+ pd->bdfn & 0x7, dtype, vdid & 0xffff, vdid >> 16,
+ rev_class & 0xff, rev_class >> 8, cname, slotstr);
+}
+
+
+static void pci_add_one_node(struct phb *phb, struct pci_device *pd,
+ struct dt_node *parent_node,
+ struct pci_lsi_state *lstate, uint8_t swizzle)
+{
+ struct pci_device *child;
+ struct dt_node *np;
+ const char *cname;
+#define MAX_NAME 256
+ char name[MAX_NAME];
+ char compat[MAX_NAME];
+ uint32_t rev_class, vdid;
+ uint32_t reg[5];
+ uint8_t intpin;
+
+ pci_cfg_read32(phb, pd->bdfn, 0, &vdid);
+ pci_cfg_read32(phb, pd->bdfn, PCI_CFG_REV_ID, &rev_class);
+ pci_cfg_read8(phb, pd->bdfn, PCI_CFG_INT_PIN, &intpin);
+
+ /*
+ * Quirk for IBM bridge bogus class on PCIe root complex.
+ * Without it, the PCI DN won't be created for its downstream
+ * devices in Linux.
+ */
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) &&
+ parent_node == phb->dt_node)
+ rev_class = (rev_class & 0xff) | 0x6040000;
+ cname = pci_class_name(rev_class >> 8);
+
+ if (pd->bdfn & 0x7)
+ snprintf(name, MAX_NAME - 1, "%s@%x,%x",
+ cname, (pd->bdfn >> 3) & 0x1f, pd->bdfn & 0x7);
+ else
+ snprintf(name, MAX_NAME - 1, "%s@%x",
+ cname, (pd->bdfn >> 3) & 0x1f);
+ np = dt_new(parent_node, name);
+
+ /* XXX FIXME: make proper "compatible" properties */
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) {
+ snprintf(compat, MAX_NAME, "pciex%x,%x",
+ vdid & 0xffff, vdid >> 16);
+ dt_add_property_cells(np, "ibm,pci-config-space-type", 1);
+ } else {
+ snprintf(compat, MAX_NAME, "pci%x,%x",
+ vdid & 0xffff, vdid >> 16);
+ dt_add_property_cells(np, "ibm,pci-config-space-type", 0);
+ }
+ dt_add_property_cells(np, "class-code", rev_class >> 8);
+ dt_add_property_cells(np, "revision-id", rev_class & 0xff);
+ dt_add_property_cells(np, "vendor-id", vdid & 0xffff);
+ dt_add_property_cells(np, "device-id", vdid >> 16);
+ if (intpin)
+ dt_add_property_cells(np, "interrupts", intpin);
+
+ /* XXX FIXME: Add a few missing ones such as
+ *
+ * - devsel-speed (!express)
+ * - max-latency
+ * - min-grant
+ * - subsystem-id
+ * - subsystem-vendor-id
+ * - ...
+ */
+
+ /* Add slot properties if needed */
+ if (pd->slot_info)
+ pci_add_slot_properties(phb, pd->slot_info, np);
+
+ /* Make up location code */
+ pci_add_loc_code(np);
+
+ /* XXX FIXME: We don't look for BARs, we only put the config space
+ * entry in the "reg" property. That's enough for Linux and we might
+ * even want to make this legit in future ePAPR
+ */
+ reg[0] = pd->bdfn << 8;
+ reg[1] = reg[2] = reg[3] = reg[4] = 0;
+ dt_add_property(np, "reg", reg, sizeof(reg));
+
+ /* Print summary info about the device */
+ pci_print_summary_line(phb, pd, np, rev_class, cname);
+
+ if (!pd->is_bridge)
+ return;
+
+ dt_add_property_cells(np, "#address-cells", 3);
+ dt_add_property_cells(np, "#size-cells", 2);
+ dt_add_property_cells(np, "#interrupt-cells", 1);
+
+ /* We want "device_type" for bridges */
+ if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false))
+ dt_add_property_string(np, "device_type", "pciex");
+ else
+ dt_add_property_string(np, "device_type", "pci");
+
+ /* Update the current interrupt swizzling level based on our own
+ * device number
+ */
+ swizzle = (swizzle + ((pd->bdfn >> 3) & 0x1f)) & 3;
+
+ /* We generate a standard-swizzling interrupt map. This is pretty
+ * big, we *could* try to be smarter for things that aren't hotplug
+ * slots at least and only populate those entries for which there's
+ * an actual children (especially on PCI Express), but for now that
+ * will do
+ */
+ pci_std_swizzle_irq_map(np, pd, lstate, swizzle);
+
+ /* We do an empty ranges property for now, we haven't setup any
+ * bridge windows, the kernel will deal with that
+ *
+ * XXX The kernel should probably fix that up
+ */
+ dt_add_property(np, "ranges", NULL, 0);
+
+ list_for_each(&pd->children, child, link)
+ pci_add_one_node(phb, child, np, lstate, swizzle);
+}
+
+static void pci_add_nodes(struct phb *phb)
+{
+ struct pci_lsi_state *lstate = &phb->lstate;
+ struct pci_device *pd;
+
+ /* If the PHB has its own slot info, add them */
+ if (phb->slot_info)
+ pci_add_slot_properties(phb, phb->slot_info, NULL);
+
+ /* Add all child devices */
+ list_for_each(&phb->devices, pd, link)
+ pci_add_one_node(phb, pd, phb->dt_node, lstate, 0);
+}
+
+static void __pci_reset(struct list_head *list)
+{
+ struct pci_device *pd;
+
+ while ((pd = list_pop(list, struct pci_device, link)) != NULL) {
+ __pci_reset(&pd->children);
+ free(pd);
+ }
+}
+
+void pci_reset(void)
+{
+ unsigned int i;
+
+ printf("PCI: Clearing all devices...\n");
+
+ lock(&pci_lock);
+
+ /* XXX Do those in parallel (at least the power up
+ * state machine could be done in parallel)
+ */
+ for (i = 0; i < PCI_MAX_PHBs; i++) {
+ if (!phbs[i])
+ continue;
+ __pci_reset(&phbs[i]->devices);
+ }
+ unlock(&pci_lock);
+}
+
+void pci_init_slots(void)
+{
+ unsigned int i;
+
+ printf("PCI: Probing PHB slots...\n");
+
+ lock(&pci_lock);
+
+ /* XXX Do those in parallel (at least the power up
+ * state machine could be done in parallel)
+ */
+ for (i = 0; i < PCI_MAX_PHBs; i++) {
+ if (!phbs[i])
+ continue;
+ pci_init_slot(phbs[i]);
+ }
+
+ if (platform.pci_probe_complete)
+ platform.pci_probe_complete();
+
+ printf("PCI: Summary\n");
+ for (i = 0; i < PCI_MAX_PHBs; i++) {
+ if (!phbs[i])
+ continue;
+ pci_add_nodes(phbs[i]);
+ }
+ unlock(&pci_lock);
+}
+
+static struct pci_device *__pci_walk_dev(struct phb *phb,
+ struct list_head *l,
+ int (*cb)(struct phb *,
+ struct pci_device *,
+ void *),
+ void *userdata)
+{
+ struct pci_device *pd, *child;
+
+ if (list_empty(l))
+ return NULL;
+
+ list_for_each(l, pd, link) {
+ if (cb && cb(phb, pd, userdata))
+ return pd;
+
+ child = __pci_walk_dev(phb, &pd->children, cb, userdata);
+ if (child)
+ return child;
+ }
+
+ return NULL;
+}
+
+struct pci_device *pci_walk_dev(struct phb *phb,
+ int (*cb)(struct phb *,
+ struct pci_device *,
+ void *),
+ void *userdata)
+{
+ return __pci_walk_dev(phb, &phb->devices, cb, userdata);
+}
+
+static int __pci_find_dev(struct phb *phb,
+ struct pci_device *pd, void *userdata)
+{
+ uint16_t bdfn = *((uint16_t *)userdata);
+
+ if (!phb || !pd)
+ return 0;
+
+ if (pd->bdfn == bdfn)
+ return 1;
+
+ return 0;
+}
+
+struct pci_device *pci_find_dev(struct phb *phb, uint16_t bdfn)
+{
+ return pci_walk_dev(phb, __pci_find_dev, &bdfn);
+}
diff --git a/core/platform.c b/core/platform.c
new file mode 100644
index 0000000..e54b334
--- /dev/null
+++ b/core/platform.c
@@ -0,0 +1,78 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#include <skiboot.h>
+#include <opal.h>
+#include <console.h>
+
+/*
+ * Various wrappers for platform functions
+ */
+static int64_t opal_cec_power_down(uint64_t request)
+{
+ printf("OPAL: Shutdown request type 0x%llx...\n", request);
+
+ if (platform.cec_power_down)
+ return platform.cec_power_down(request);
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CEC_POWER_DOWN, opal_cec_power_down, 1);
+
+static int64_t opal_cec_reboot(void)
+{
+ printf("OPAL: Reboot request...\n");
+
+#ifdef ENABLE_FAST_RESET
+ /* Try a fast reset first */
+ fast_reset();
+#endif
+ if (platform.cec_reboot)
+ return platform.cec_reboot();
+
+ return OPAL_SUCCESS;
+}
+opal_call(OPAL_CEC_REBOOT, opal_cec_reboot, 0);
+
+static void generic_platform_init(void)
+{
+ /* Do we want to unconditionally enable it ? */
+ if (dummy_console_enabled())
+ dummy_console_add_nodes();
+}
+
+static struct platform generic_platform = {
+ .name = "generic",
+ .init = generic_platform_init,
+};
+
+void probe_platform(void)
+{
+ struct platform *platforms = &__platforms_start;
+ unsigned int i;
+
+ platform = generic_platform;
+
+ for (i = 0; &platforms[i] < &__platforms_end; i++) {
+ if (platforms[i].probe && platforms[i].probe()) {
+ platform = platforms[i];
+ break;
+ }
+ }
+
+ printf("PLAT: Detected %s platform\n", platform.name);
+}
diff --git a/core/relocate.c b/core/relocate.c
new file mode 100644
index 0000000..f6bda37
--- /dev/null
+++ b/core/relocate.c
@@ -0,0 +1,65 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdbool.h>
+#include <elf.h>
+
+/* WARNING: This code is used to self-relocate, it cannot have any
+ * global reference nor TOC reference. It's also called before BSS
+ * is cleared.
+ */
+
+/* Called from head.S, thus no header. */
+int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela);
+
+/* Note: This code is simplified according to the assumptions
+ * that our link address is 0 and we are running at the
+ * target address already.
+ */
+int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela)
+{
+ uint64_t dt_rela = 0;
+ uint64_t dt_relacount = 0;
+ unsigned int i;
+
+ /* Look for relocation table */
+ for (; dyn->d_tag != DT_NULL; dyn++) {
+ if (dyn->d_tag == DT_RELA)
+ dt_rela = dyn->d_val;
+ else if (dyn->d_tag == DT_RELACOUNT)
+ dt_relacount = dyn->d_val;
+ }
+
+ /* If we miss either rela or relacount, bail */
+ if (!dt_rela || !dt_relacount)
+ return false;
+
+ /* Check if the offset is consistent */
+ if ((offset + dt_rela) != (uint64_t)rela)
+ return false;
+
+ /* Perform relocations */
+ for (i = 0; i < dt_relacount; i++, rela++) {
+ uint64_t *t;
+
+ if (ELF64_R_TYPE(rela->r_info) != R_PPC64_RELATIVE)
+ return false;
+ t = (uint64_t *)(rela->r_offset + offset);
+ *t = rela->r_addend + offset;
+ }
+
+ return true;
+}
diff --git a/core/test/Makefile.check b/core/test/Makefile.check
new file mode 100644
index 0000000..37dac46
--- /dev/null
+++ b/core/test/Makefile.check
@@ -0,0 +1,29 @@
+# -*-Makefile-*-
+CORE_TEST := core/test/run-device core/test/run-mem_region core/test/run-malloc core/test/run-malloc-speed core/test/run-mem_region_init core/test/run-mem_region_release_unused core/test/run-mem_region_release_unused_noalloc core/test/run-trace core/test/run-msg
+
+check: $(CORE_TEST:%=%-check)
+
+$(CORE_TEST:%=%-check) : %-check: %
+ $(VALGRIND) $<
+
+core/test/stubs.o: core/test/stubs.c
+ $(HOSTCC) $(HOSTCFLAGS) -g -c -o $@ $<
+
+$(CORE_TEST) : core/test/stubs.o
+
+$(CORE_TEST) : % : %.c
+ $(HOSTCC) $(HOSTCFLAGS) -O0 -g -I include -I . -I libfdt -o $@ $< core/test/stubs.o
+
+$(CORE_TEST): % : %.d
+
+core/test/stubs.o: core/test/stubs.d
+
+core/test/%.d: core/test/%.c
+ $(HOSTCC) $(HOSTCFLAGS) -I include -I . -I libfdt -M $< > $@
+
+-include core/test/*.d
+
+clean: core-test-clean
+
+core-test-clean:
+ $(RM) -f core/test/*.[od] $(CORE_TEST)
diff --git a/core/test/run-device.c b/core/test/run-device.c
new file mode 100644
index 0000000..fa9e951
--- /dev/null
+++ b/core/test/run-device.c
@@ -0,0 +1,118 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+
+/* Override this for testing. */
+#define is_rodata(p) fake_is_rodata(p)
+
+char __rodata_start[16];
+#define __rodata_end (__rodata_start + sizeof(__rodata_start))
+
+static inline bool fake_is_rodata(const void *p)
+{
+ return ((char *)p >= __rodata_start && (char *)p < __rodata_end);
+}
+
+#define zalloc(bytes) calloc((bytes), 1)
+
+#include "../device.c"
+#include "../../ccan/list/list.c" /* For list_check */
+#include <assert.h>
+
+int main(void)
+{
+ struct dt_node *root, *c1, *c2, *gc1, *gc2, *gc3, *ggc1, *i;
+ const struct dt_property *p;
+ struct dt_property *p2;
+ unsigned int n;
+
+ root = dt_new_root("root");
+ assert(!list_top(&root->properties, struct dt_property, list));
+ c1 = dt_new(root, "c1");
+ assert(!list_top(&c1->properties, struct dt_property, list));
+ c2 = dt_new(root, "c2");
+ assert(!list_top(&c2->properties, struct dt_property, list));
+ gc1 = dt_new(c1, "gc1");
+ assert(!list_top(&gc1->properties, struct dt_property, list));
+ gc2 = dt_new(c1, "gc2");
+ assert(!list_top(&gc2->properties, struct dt_property, list));
+ gc3 = dt_new(c1, "gc3");
+ assert(!list_top(&gc3->properties, struct dt_property, list));
+ ggc1 = dt_new(gc1, "ggc1");
+ assert(!list_top(&ggc1->properties, struct dt_property, list));
+
+ for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) {
+ assert(!list_top(&i->properties, struct dt_property, list));
+ dt_add_property_cells(i, "visited", 1);
+ }
+ assert(n == 6);
+
+ for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) {
+ p = list_top(&i->properties, struct dt_property, list);
+ assert(strcmp(p->name, "visited") == 0);
+ assert(p->len == sizeof(u32));
+ assert(fdt32_to_cpu(*(u32 *)p->prop) == 1);
+ }
+ assert(n == 6);
+
+ dt_add_property_cells(c1, "some-property", 1, 2, 3);
+ p = dt_find_property(c1, "some-property");
+ assert(p);
+ assert(strcmp(p->name, "some-property") == 0);
+ assert(p->len == sizeof(u32) * 3);
+ assert(fdt32_to_cpu(*(u32 *)p->prop) == 1);
+ assert(fdt32_to_cpu(*((u32 *)p->prop + 1)) == 2);
+ assert(fdt32_to_cpu(*((u32 *)p->prop + 2)) == 3);
+
+ /* Test freeing a single node */
+ assert(!list_empty(&gc1->children));
+ dt_free(ggc1);
+ assert(list_empty(&gc1->children));
+
+ /* Test rodata logic. */
+ assert(!is_rodata("hello"));
+ assert(is_rodata(__rodata_start));
+ strcpy(__rodata_start, "name");
+ ggc1 = dt_new(root, __rodata_start);
+ assert(ggc1->name == __rodata_start);
+
+ /* Test string node. */
+ dt_add_property_string(ggc1, "somestring", "someval");
+ assert(dt_has_node_property(ggc1, "somestring", "someval"));
+ assert(!dt_has_node_property(ggc1, "somestrin", "someval"));
+ assert(!dt_has_node_property(ggc1, "somestring", "someva"));
+ assert(!dt_has_node_property(ggc1, "somestring", "somevale"));
+
+ /* Test resizing property. */
+ p = p2 = __dt_find_property(c1, "some-property");
+ assert(p);
+ n = p2->len;
+ while (p2 == p) {
+ n *= 2;
+ dt_resize_property(&p2, n);
+ }
+
+ assert(dt_find_property(c1, "some-property") == p2);
+ list_check(&c1->properties, "properties after resizing");
+
+ dt_del_property(c1, p2);
+ list_check(&c1->properties, "properties after delete");
+
+ /* No leaks for valgrind! */
+ dt_free(root);
+ return 0;
+}
diff --git a/core/test/run-malloc-speed.c b/core/test/run-malloc-speed.c
new file mode 100644
index 0000000..edc7589
--- /dev/null
+++ b/core/test/run-malloc-speed.c
@@ -0,0 +1,94 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+/* Don't include this, it's PPC-specific */
+#define __CPU_H
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+
+#include <stdlib.h>
+
+/* Use these before we undefine them below. */
+static inline void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void real_free(void *p)
+{
+ return free(p);
+}
+
+#include <skiboot.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../malloc.c"
+#include "../mem_region.c"
+#include "../device.c"
+
+#undef malloc
+#undef free
+#undef realloc
+
+#include <assert.h>
+#include <stdio.h>
+
+char __rodata_start[1], __rodata_end[1];
+struct dt_node *dt_root;
+
+void lock(struct lock *l)
+{
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+#define TEST_HEAP_ORDER 27
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+#define NUM_ALLOCS 4096
+
+int main(void)
+{
+ uint64_t i, len;
+ void *p[NUM_ALLOCS];
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)real_malloc(skiboot_heap.len);
+
+ len = skiboot_heap.len / NUM_ALLOCS - sizeof(struct alloc_hdr);
+ for (i = 0; i < NUM_ALLOCS; i++) {
+ p[i] = __malloc(len, __location__);
+ assert(p[i] > region_start(&skiboot_heap));
+ assert(p[i] + len <= region_start(&skiboot_heap)
+ + skiboot_heap.len);
+ }
+ assert(mem_check(&skiboot_heap));
+ assert(mem_region_lock.lock_val == 0);
+ free(region_start(&skiboot_heap));
+ return 0;
+}
diff --git a/core/test/run-malloc.c b/core/test/run-malloc.c
new file mode 100644
index 0000000..226ce75
--- /dev/null
+++ b/core/test/run-malloc.c
@@ -0,0 +1,144 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+/* Don't include this, it's PPC-specific */
+#define __CPU_H
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+
+#include <skiboot.h>
+
+#define is_rodata(p) true
+
+#include "../mem_region.c"
+#include "../malloc.c"
+#include "../device.c"
+
+#include "mem_region-malloc.h"
+
+#define TEST_HEAP_ORDER 12
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+struct dt_node *dt_root;
+
+void lock(struct lock *l)
+{
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+static bool heap_empty(void)
+{
+ const struct alloc_hdr *h = region_start(&skiboot_heap);
+ return h->num_longs == skiboot_heap.len / sizeof(long);
+}
+
+int main(void)
+{
+ char test_heap[TEST_HEAP_SIZE], *p, *p2, *p3, *p4;
+ size_t i;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)test_heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+
+ /* Allocations of various sizes. */
+ for (i = 0; i < TEST_HEAP_ORDER; i++) {
+ p = malloc(1ULL << i);
+ assert(p);
+ assert(p > (char *)test_heap);
+ assert(p + (1ULL << i) <= (char *)test_heap + TEST_HEAP_SIZE);
+ assert(!mem_region_lock.lock_val);
+ free(p);
+ assert(!mem_region_lock.lock_val);
+ assert(heap_empty());
+ }
+
+ /* Realloc as malloc. */
+ mem_region_lock.lock_val = 0;
+ p = realloc(NULL, 100);
+ assert(p);
+ assert(!mem_region_lock.lock_val);
+
+ /* Realloc as free. */
+ p = realloc(p, 0);
+ assert(!p);
+ assert(!mem_region_lock.lock_val);
+ assert(heap_empty());
+
+ /* Realloc longer. */
+ p = realloc(NULL, 100);
+ assert(p);
+ assert(!mem_region_lock.lock_val);
+ p2 = realloc(p, 200);
+ assert(p2 == p);
+ assert(!mem_region_lock.lock_val);
+ free(p);
+ assert(!mem_region_lock.lock_val);
+ assert(heap_empty());
+
+ /* Realloc shorter. */
+ mem_region_lock.lock_val = 0;
+ p = realloc(NULL, 100);
+ assert(!mem_region_lock.lock_val);
+ assert(p);
+ p2 = realloc(p, 1);
+ assert(!mem_region_lock.lock_val);
+ assert(p2 == p);
+ free(p);
+ assert(!mem_region_lock.lock_val);
+ assert(heap_empty());
+
+ /* Realloc with move. */
+ p2 = malloc(TEST_HEAP_SIZE - 64 - sizeof(struct alloc_hdr)*2);
+ assert(p2);
+ p = malloc(64);
+ assert(p);
+ free(p2);
+
+ p2 = realloc(p, 128);
+ assert(p2 != p);
+ free(p2);
+ assert(heap_empty());
+ assert(!mem_region_lock.lock_val);
+
+ /* Reproduce bug BZ109128/SW257364 */
+ p = malloc(100);
+ p2 = malloc(100);
+ p3 = malloc(100);
+ p4 = malloc(100);
+ free(p2);
+ realloc(p,216);
+ free(p3);
+ free(p);
+ free(p4);
+ assert(heap_empty());
+ assert(!mem_region_lock.lock_val);
+
+ return 0;
+}
diff --git a/core/test/run-mem_region.c b/core/test/run-mem_region.c
new file mode 100644
index 0000000..f0ad2c2
--- /dev/null
+++ b/core/test/run-mem_region.c
@@ -0,0 +1,250 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+/* Don't include this, it's PPC-specific */
+#define __CPU_H
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+
+#include <stdlib.h>
+#include <string.h>
+
+/* Use these before we override definitions below. */
+static void *__malloc(size_t size, const char *location __attribute__((unused)))
+{
+ return malloc(size);
+}
+
+static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused)))
+{
+ return realloc(ptr, size);
+}
+
+static inline void __free(void *p, const char *location __attribute__((unused)))
+{
+ return free(p);
+}
+
+static void *__zalloc(size_t size, const char *location __attribute__((unused)))
+{
+ void *ptr = malloc(size);
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+#include <skiboot.h>
+
+#define is_rodata(p) true
+
+#include "../mem_region.c"
+#include "../device.c"
+
+#include <assert.h>
+#include <stdio.h>
+
+struct dt_node *dt_root;
+
+void lock(struct lock *l)
+{
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ l->lock_val--;
+}
+
+#define TEST_HEAP_ORDER 12
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static bool heap_empty(void)
+{
+ const struct alloc_hdr *h = region_start(&skiboot_heap);
+ return h->num_longs == skiboot_heap.len / sizeof(long);
+}
+
+int main(void)
+{
+ char *test_heap;
+ void *p, *ptrs[100];
+ size_t i;
+ struct mem_region *r;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ test_heap = __malloc(TEST_HEAP_SIZE, __location__);
+ skiboot_heap.start = (unsigned long)test_heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+
+ /* Allocations of various sizes. */
+ for (i = 0; i < TEST_HEAP_ORDER; i++) {
+ p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here");
+ assert(p);
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "here"));
+ assert(p > (void *)test_heap);
+ assert(p + (1ULL << i) <= (void *)test_heap + TEST_HEAP_SIZE);
+ assert(mem_size(&skiboot_heap, p) >= 1ULL << i);
+ mem_free(&skiboot_heap, p, "freed");
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "freed"));
+ }
+ p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here");
+ assert(!p);
+ mem_free(&skiboot_heap, p, "freed");
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+
+ /* Allocations of various alignments: use small alloc first. */
+ ptrs[0] = mem_alloc(&skiboot_heap, 1, 1, "small");
+ for (i = 0; ; i++) {
+ p = mem_alloc(&skiboot_heap, 1, 1ULL << i, "here");
+ assert(mem_check(&skiboot_heap));
+ /* We will eventually fail... */
+ if (!p) {
+ assert(i >= TEST_HEAP_ORDER);
+ break;
+ }
+ assert(p);
+ assert((long)p % (1ULL << i) == 0);
+ assert(p > (void *)test_heap);
+ assert(p + 1 <= (void *)test_heap + TEST_HEAP_SIZE);
+ mem_free(&skiboot_heap, p, "freed");
+ assert(mem_check(&skiboot_heap));
+ }
+ mem_free(&skiboot_heap, ptrs[0], "small freed");
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+
+ /* Many little allocations, freed in reverse order. */
+ for (i = 0; i < 100; i++) {
+ ptrs[i] = mem_alloc(&skiboot_heap, sizeof(long), 1, "here");
+ assert(ptrs[i]);
+ assert(ptrs[i] > (void *)test_heap);
+ assert(ptrs[i] + sizeof(long)
+ <= (void *)test_heap + TEST_HEAP_SIZE);
+ assert(mem_check(&skiboot_heap));
+ }
+ for (i = 0; i < 100; i++)
+ mem_free(&skiboot_heap, ptrs[100 - 1 - i], "freed");
+
+ assert(heap_empty());
+ assert(mem_check(&skiboot_heap));
+
+ /* Check the prev_free gets updated properly. */
+ ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0]");
+ ptrs[1] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[1]");
+ assert(ptrs[1] > ptrs[0]);
+ mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free");
+ assert(mem_check(&skiboot_heap));
+ ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0] again");
+ assert(mem_check(&skiboot_heap));
+ mem_free(&skiboot_heap, ptrs[1], "ptrs[1] free");
+ mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free");
+ assert(mem_check(&skiboot_heap));
+ assert(heap_empty());
+
+#if 0
+ printf("Heap map:\n");
+ for (i = 0; i < TEST_HEAP_SIZE / sizeof(long); i++) {
+ printf("%u", test_bit(skiboot_heap.bitmap, i));
+ if (i % 64 == 63)
+ printf("\n");
+ else if (i % 8 == 7)
+ printf(" ");
+ }
+#endif
+
+ /* Simple enlargement, then free */
+ p = mem_alloc(&skiboot_heap, 1, 1, "one byte");
+ assert(p);
+ assert(mem_resize(&skiboot_heap, p, 100, "hundred bytes"));
+ assert(mem_size(&skiboot_heap, p) >= 100);
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "hundred bytes"));
+ mem_free(&skiboot_heap, p, "freed");
+
+ /* Simple shrink, then free */
+ p = mem_alloc(&skiboot_heap, 100, 1, "100 bytes");
+ assert(p);
+ assert(mem_resize(&skiboot_heap, p, 1, "1 byte"));
+ assert(mem_size(&skiboot_heap, p) < 100);
+ assert(mem_check(&skiboot_heap));
+ assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "1 byte"));
+ mem_free(&skiboot_heap, p, "freed");
+
+ /* Lots of resizing (enlarge). */
+ p = mem_alloc(&skiboot_heap, 1, 1, "one byte");
+ assert(p);
+ for (i = 1; i <= TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i++) {
+ assert(mem_resize(&skiboot_heap, p, i, "enlarge"));
+ assert(mem_size(&skiboot_heap, p) >= i);
+ assert(mem_check(&skiboot_heap));
+ }
+
+ /* Can't make it larger though. */
+ assert(!mem_resize(&skiboot_heap, p, i, "enlarge"));
+
+ for (i = TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i > 0; i--) {
+ assert(mem_resize(&skiboot_heap, p, i, "shrink"));
+ assert(mem_check(&skiboot_heap));
+ }
+
+ mem_free(&skiboot_heap, p, "freed");
+ assert(mem_check(&skiboot_heap));
+
+ /* Test splitting of a region. */
+ r = new_region("base", (unsigned long)test_heap,
+ TEST_HEAP_SIZE, NULL, REGION_SKIBOOT_HEAP);
+ assert(add_region(r));
+ r = new_region("splitter", (unsigned long)test_heap + TEST_HEAP_SIZE/4,
+ TEST_HEAP_SIZE/2, NULL, REGION_RESERVED);
+ assert(add_region(r));
+ /* Now we should have *three* regions. */
+ i = 0;
+ list_for_each(&regions, r, list) {
+ if (region_start(r) == test_heap) {
+ assert(r->len == TEST_HEAP_SIZE/4);
+ assert(strcmp(r->name, "base") == 0);
+ assert(r->type == REGION_SKIBOOT_HEAP);
+ } else if (region_start(r) == test_heap + TEST_HEAP_SIZE / 4) {
+ assert(r->len == TEST_HEAP_SIZE/2);
+ assert(strcmp(r->name, "splitter") == 0);
+ assert(r->type == REGION_RESERVED);
+ assert(!r->free_list.n.next);
+ } else if (region_start(r) == test_heap + TEST_HEAP_SIZE/4*3) {
+ assert(r->len == TEST_HEAP_SIZE/4);
+ assert(strcmp(r->name, "base") == 0);
+ assert(r->type == REGION_SKIBOOT_HEAP);
+ } else
+ abort();
+ assert(mem_check(r));
+ i++;
+ }
+ assert(i == 3);
+ while ((r = list_pop(&regions, struct mem_region, list)) != NULL) {
+ list_del(&r->list);
+ mem_free(&skiboot_heap, r, __location__);
+ }
+ assert(mem_region_lock.lock_val == 0);
+ __free(test_heap, "");
+ return 0;
+}
diff --git a/core/test/run-mem_region_init.c b/core/test/run-mem_region_init.c
new file mode 100644
index 0000000..a24cc7b
--- /dev/null
+++ b/core/test/run-mem_region_init.c
@@ -0,0 +1,179 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+/* Don't include this, it's PPC-specific */
+#define __CPU_H
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+
+#include <stdlib.h>
+
+/* Use these before we undefine them below. */
+static inline void *real_malloc(size_t size)
+{
+ return malloc(size);
+}
+
+static inline void real_free(void *p)
+{
+ return free(p);
+}
+
+#include "../malloc.c"
+
+#include <skiboot.h>
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+
+static inline char *skiboot_strdup(const char *str)
+{
+ char *ret = __malloc(strlen(str) + 1, "");
+ return memcpy(ret, str, strlen(str) + 1);
+}
+#undef strdup
+#define strdup skiboot_strdup
+
+#include "../device.c"
+
+#include <skiboot.h>
+
+#include <assert.h>
+#include <stdio.h>
+
+void lock(struct lock *l)
+{
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+/* We actually need a lot of room for the bitmaps! */
+#define TEST_HEAP_ORDER 27
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char name[sizeof("memory@") + STR_MAX_CHARS(reg[0])];
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (unsigned long long)start);
+
+ mem = dt_new(dt_root, name);
+ assert(mem);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+int main(void)
+{
+ uint64_t end;
+ int builtins;
+ struct mem_region *r;
+ char *heap = real_malloc(TEST_HEAP_SIZE);
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)heap;
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.len = (unsigned long)heap;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ /* Make sure we overlap the heap, at least. */
+ add_mem_node(0, 0x100000000ULL);
+ add_mem_node(0x100000000ULL, 0x100000000ULL);
+ end = 0x200000000ULL;
+
+ /* Now convert. */
+ mem_region_init();
+ assert(mem_check(&skiboot_heap));
+
+ builtins = 0;
+ list_for_each(&regions, r, list) {
+ /* Regions must not overlap. */
+ struct mem_region *r2, *pre = NULL, *post = NULL;
+ list_for_each(&regions, r2, list) {
+ if (r == r2)
+ continue;
+ assert(!overlaps(r, r2));
+ }
+
+ /* But should have exact neighbours. */
+ list_for_each(&regions, r2, list) {
+ if (r == r2)
+ continue;
+ if (r2->start == r->start + r->len)
+ post = r2;
+ if (r2->start + r2->len == r->start)
+ pre = r2;
+ }
+ assert(r->start == 0 || pre);
+ assert(r->start + r->len == end || post);
+
+ if (r == &skiboot_code_and_text ||
+ r == &skiboot_heap ||
+ r == &skiboot_after_heap ||
+ r == &skiboot_cpu_stacks ||
+ r == &skiboot_os_reserve)
+ builtins++;
+ else
+ assert(r->type == REGION_SKIBOOT_HEAP);
+ assert(mem_check(r));
+ }
+ assert(builtins == 5);
+
+ dt_free(dt_root);
+
+ while ((r = list_pop(&regions, struct mem_region, list)) != NULL) {
+ list_del(&r->list);
+ if (r != &skiboot_code_and_text &&
+ r != &skiboot_heap &&
+ r != &skiboot_after_heap &&
+ r != &skiboot_os_reserve &&
+ r != &skiboot_cpu_stacks) {
+ free(r);
+ }
+ assert(mem_check(&skiboot_heap));
+ }
+ assert(mem_region_lock.lock_val == 0);
+ real_free(heap);
+ return 0;
+}
diff --git a/core/test/run-mem_region_release_unused.c b/core/test/run-mem_region_release_unused.c
new file mode 100644
index 0000000..e73cf25
--- /dev/null
+++ b/core/test/run-mem_region_release_unused.c
@@ -0,0 +1,177 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+/* Don't include this, it's PPC-specific */
+#define __CPU_H
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+
+#include <stdlib.h>
+
+static void *__malloc(size_t size, const char *location __attribute__((unused)))
+{
+ return malloc(size);
+}
+
+static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused)))
+{
+ return realloc(ptr, size);
+}
+
+static void *__zalloc(size_t size, const char *location __attribute__((unused)))
+{
+ return calloc(size, 1);
+}
+
+static inline void __free(void *p, const char *location __attribute__((unused)))
+{
+ return free(p);
+}
+
+#include <skiboot.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+
+#include "../device.c"
+#include <assert.h>
+#include <stdio.h>
+
+void lock(struct lock *l)
+{
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ l->lock_val--;
+}
+
+#define TEST_HEAP_ORDER 12
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char name[sizeof("memory@") + STR_MAX_CHARS(reg[0])];
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (long long)start);
+
+ mem = dt_new(dt_root, name);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+int main(void)
+{
+ uint64_t i;
+ struct mem_region *r, *other = NULL;
+ void *other_mem;
+ const char *last;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)malloc(TEST_HEAP_SIZE);
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.len = skiboot_heap.start;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ other_mem = malloc(1024*1024);
+ add_mem_node((unsigned long)other_mem, 1024*1024);
+
+ /* Now convert. */
+ mem_region_init();
+
+ /* Find our node to allocate from */
+ list_for_each(&regions, r, list) {
+ if (region_start(r) == other_mem)
+ other = r;
+ }
+ /* This could happen if skiboot addresses clashed with our alloc. */
+ assert(other);
+ assert(mem_check(other));
+
+ /* Allocate 1k from other region. */
+ mem_alloc(other, 1024, 1, "1k");
+ mem_region_release_unused();
+
+ assert(mem_check(&skiboot_heap));
+
+ /* Now we expect it to be split. */
+ i = 0;
+ list_for_each(&regions, r, list) {
+ assert(mem_check(r));
+ i++;
+ if (r == &skiboot_os_reserve)
+ continue;
+ if (r == &skiboot_code_and_text)
+ continue;
+ if (r == &skiboot_heap)
+ continue;
+ if (r == &skiboot_after_heap)
+ continue;
+ if (r == &skiboot_cpu_stacks)
+ continue;
+ if (r == other) {
+ assert(r->type == REGION_SKIBOOT_HEAP);
+ assert(r->len < 1024 * 1024);
+ } else {
+ assert(r->type == REGION_OS);
+ assert(r->start == other->start + other->len);
+ assert(r->start + r->len == other->start + 1024*1024);
+ }
+ }
+ assert(i == 7);
+
+ last = NULL;
+ list_for_each(&regions, r, list) {
+ if (last != r->name &&
+ strncmp(r->name, NODE_REGION_PREFIX,
+ strlen(NODE_REGION_PREFIX)) == 0) {
+ /* It's safe to cast away const as this is
+ * only going to happen in test code */
+ free((void*)r->name);
+ break;
+ }
+ last = r->name;
+ }
+
+ dt_free(dt_root);
+ free((void *)(long)skiboot_heap.start);
+ free(other_mem);
+ return 0;
+}
diff --git a/core/test/run-mem_region_release_unused_noalloc.c b/core/test/run-mem_region_release_unused_noalloc.c
new file mode 100644
index 0000000..818e272
--- /dev/null
+++ b/core/test/run-mem_region_release_unused_noalloc.c
@@ -0,0 +1,159 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+/* Don't include this, it's PPC-specific */
+#define __CPU_H
+static unsigned int cpu_max_pir = 1;
+struct cpu_thread {
+ unsigned int chip_id;
+};
+
+#include <stdlib.h>
+
+static void *__malloc(size_t size, const char *location __attribute__((unused)))
+{
+ return malloc(size);
+}
+
+static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused)))
+{
+ return realloc(ptr, size);
+}
+
+static void *__zalloc(size_t size, const char *location __attribute__((unused)))
+{
+ return calloc(size, 1);
+}
+
+static inline void __free(void *p, const char *location __attribute__((unused)))
+{
+ return free(p);
+}
+
+#include <skiboot.h>
+
+/* We need mem_region to accept __location__ */
+#define is_rodata(p) true
+#include "../mem_region.c"
+
+/* But we need device tree to make copies of names. */
+#undef is_rodata
+#define is_rodata(p) false
+
+#include "../device.c"
+#include <assert.h>
+#include <stdio.h>
+
+void lock(struct lock *l)
+{
+ l->lock_val++;
+}
+
+void unlock(struct lock *l)
+{
+ l->lock_val--;
+}
+
+#define TEST_HEAP_ORDER 12
+#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER)
+
+static void add_mem_node(uint64_t start, uint64_t len)
+{
+ struct dt_node *mem;
+ u64 reg[2];
+ char name[sizeof("memory@") + STR_MAX_CHARS(reg[0])];
+
+ /* reg contains start and length */
+ reg[0] = cpu_to_be64(start);
+ reg[1] = cpu_to_be64(len);
+
+ sprintf(name, "memory@%llx", (long long)start);
+
+ mem = dt_new(dt_root, name);
+ dt_add_property_string(mem, "device_type", "memory");
+ dt_add_property(mem, "reg", reg, sizeof(reg));
+}
+
+void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused)))
+{
+}
+
+int main(void)
+{
+ uint64_t i;
+ struct mem_region *r;
+ const char *last;
+
+ /* Use malloc for the heap, so valgrind can find issues. */
+ skiboot_heap.start = (unsigned long)malloc(TEST_HEAP_SIZE);
+ skiboot_heap.len = TEST_HEAP_SIZE;
+ skiboot_os_reserve.len = skiboot_heap.start;
+
+ dt_root = dt_new_root("");
+ dt_add_property_cells(dt_root, "#address-cells", 2);
+ dt_add_property_cells(dt_root, "#size-cells", 2);
+
+ add_mem_node(0, 0x100000000ULL);
+ add_mem_node(0x100000000ULL, 0x100000000ULL);
+
+ mem_region_init();
+
+ mem_region_release_unused();
+
+ assert(mem_check(&skiboot_heap));
+
+ /* Now we expect it to be split. */
+ i = 0;
+ list_for_each(&regions, r, list) {
+ assert(mem_check(r));
+ i++;
+ if (r == &skiboot_os_reserve)
+ continue;
+ if (r == &skiboot_code_and_text)
+ continue;
+ if (r == &skiboot_heap)
+ continue;
+ if (r == &skiboot_after_heap)
+ continue;
+ if (r == &skiboot_cpu_stacks)
+ continue;
+
+ /* the memory nodes should all be available to the OS now */
+ assert(r->type == REGION_OS);
+ }
+ assert(i == 9);
+
+ last = NULL;
+ list_for_each(&regions, r, list) {
+ if (last != r->name &&
+ strncmp(r->name, NODE_REGION_PREFIX,
+ strlen(NODE_REGION_PREFIX)) == 0) {
+ /* It's safe to cast away the const as
+ * this never happens at runtime,
+ * only in test and only for valgrind
+ */
+ free((void*)r->name);
+ }
+ last = r->name;
+ }
+
+ dt_free(dt_root);
+ free((void *)(long)skiboot_heap.start);
+ return 0;
+}
diff --git a/core/test/run-msg.c b/core/test/run-msg.c
new file mode 100644
index 0000000..cd36408
--- /dev/null
+++ b/core/test/run-msg.c
@@ -0,0 +1,256 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <skiboot.h>
+#include <inttypes.h>
+#include <assert.h>
+
+static bool zalloc_should_fail = false;
+static void *zalloc(size_t size)
+{
+ if (zalloc_should_fail) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ return calloc(size, 1);
+}
+
+#include "../opal-msg.c"
+
+void lock(struct lock *l)
+{
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values)
+{
+ (void)evt_mask;
+ (void)evt_values;
+}
+
+static long magic = 8097883813087437089UL;
+static void callback(void *data)
+{
+ assert(*(uint64_t *)data == magic);
+}
+
+static size_t list_count(struct list_head *list)
+{
+ size_t count = 0;
+ struct opal_msg_entry *dummy;
+
+ list_for_each(list, dummy, link)
+ count++;
+ return count;
+}
+
+int main(void)
+{
+ struct opal_msg_entry* entry;
+ int free_size = OPAL_MAX_MSGS;
+ int nfree = free_size;
+ int npending = 0;
+ int r;
+ static struct opal_msg m;
+ uint64_t *m_ptr = (uint64_t *)&m;
+
+ opal_init_msg();
+
+ assert(list_count(&msg_pending_list) == npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Callback. */
+ r = opal_queue_msg(0, &magic, callback, (u64)0, (u64)1, (u64)2);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(m.params[0] == 0);
+ assert(m.params[1] == 1);
+ assert(m.params[2] == 2);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ /* No params. */
+ r = opal_queue_msg(0, NULL, NULL);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ /* > 8 params (ARRAY_SIZE(entry->msg.params) */
+ r = opal_queue_msg(0, NULL, NULL, 0, 1, 2, 3, 4, 5, 6, 7, 0xBADDA7A);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ assert(m.params[0] == 0);
+ assert(m.params[1] == 1);
+ assert(m.params[2] == 2);
+ assert(m.params[3] == 3);
+ assert(m.params[4] == 4);
+ assert(m.params[5] == 5);
+ assert(m.params[6] == 6);
+ assert(m.params[7] == 7);
+
+ /* 8 params (ARRAY_SIZE(entry->msg.params) */
+ r = opal_queue_msg(0, NULL, NULL, 0, 10, 20, 30, 40, 50, 60, 70);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == --npending);
+ assert(list_count(&msg_free_list) == ++nfree);
+
+ assert(m.params[0] == 0);
+ assert(m.params[1] == 10);
+ assert(m.params[2] == 20);
+ assert(m.params[3] == 30);
+ assert(m.params[4] == 40);
+ assert(m.params[5] == 50);
+ assert(m.params[6] == 60);
+ assert(m.params[7] == 70);
+
+ /* Full list (no free nodes in pending). */
+ while (nfree > 0) {
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == 0);
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+ }
+ assert(list_count(&msg_free_list) == 0);
+ assert(nfree == 0);
+ assert(npending == OPAL_MAX_MSGS);
+
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1);
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Make zalloc fail to test error handling. */
+ zalloc_should_fail = true;
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == OPAL_RESOURCE);
+
+ assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1);
+ assert(list_count(&msg_pending_list) == npending);
+ assert(list_count(&msg_free_list) == nfree);
+
+ /* Empty list (no nodes). */
+ while(!list_empty(&msg_pending_list)) {
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+ npending--;
+ nfree++;
+ }
+ assert(list_count(&msg_pending_list) == npending);
+ assert(list_count(&msg_free_list) == nfree);
+ assert(npending == 0);
+ assert(nfree == OPAL_MAX_MSGS+1);
+
+ r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL);
+ assert(r == 0);
+
+ assert(list_count(&msg_pending_list) == ++npending);
+ assert(list_count(&msg_free_list) == --nfree);
+
+ /* Request invalid size. */
+ r = opal_get_msg(m_ptr, sizeof(m) - 1);
+ assert(r == OPAL_PARAMETER);
+
+ /* Pass null buffer. */
+ r = opal_get_msg(NULL, sizeof(m));
+ assert(r == OPAL_PARAMETER);
+
+ /* Get msg when none are pending. */
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == 0);
+
+ r = opal_get_msg(m_ptr, sizeof(m));
+ assert(r == OPAL_RESOURCE);
+
+#define test_queue_num(type, val) \
+ r = opal_queue_msg(0, NULL, NULL, \
+ (type)val, (type)val, (type)val, (type)val, \
+ (type)val, (type)val, (type)val, (type)val); \
+ assert(r == 0); \
+ opal_get_msg(m_ptr, sizeof(m)); \
+ assert(r == OPAL_SUCCESS); \
+ assert(m.params[0] == (type)val); \
+ assert(m.params[1] == (type)val); \
+ assert(m.params[2] == (type)val); \
+ assert(m.params[3] == (type)val); \
+ assert(m.params[4] == (type)val); \
+ assert(m.params[5] == (type)val); \
+ assert(m.params[6] == (type)val); \
+ assert(m.params[7] == (type)val)
+
+ /* Test types of various widths */
+ test_queue_num(u64, -1);
+ test_queue_num(s64, -1);
+ test_queue_num(u32, -1);
+ test_queue_num(s32, -1);
+ test_queue_num(u16, -1);
+ test_queue_num(s16, -1);
+ test_queue_num(u8, -1);
+ test_queue_num(s8, -1);
+
+ /* Clean up the list to keep valgrind happy. */
+ while(!list_empty(&msg_free_list)) {
+ entry = list_pop(&msg_free_list, struct opal_msg_entry, link);
+ assert(entry);
+ free(entry);
+ }
+
+ while(!list_empty(&msg_pending_list)) {
+ entry = list_pop(&msg_pending_list, struct opal_msg_entry, link);
+ assert(entry);
+ free(entry);
+ }
+
+ return 0;
+}
diff --git a/core/test/run-trace.c b/core/test/run-trace.c
new file mode 100644
index 0000000..7dabebd
--- /dev/null
+++ b/core/test/run-trace.c
@@ -0,0 +1,386 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <config.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+/* Don't include these: PPC-specific */
+#define __CPU_H
+#define __TIME_H
+#define __PROCESSOR_H
+
+#if defined(__i386__) || defined(__x86_64__)
+/* This is more than a lwsync, but it'll work */
+static void full_barrier(void)
+{
+ asm volatile("mfence" : : : "memory");
+}
+#define lwsync full_barrier
+#define sync full_barrier
+#else
+#error "Define sync & lwsync for this arch"
+#endif
+
+#define zalloc(size) calloc((size), 1)
+
+struct cpu_thread {
+ uint32_t pir;
+ uint32_t chip_id;
+ struct trace_info *trace;
+ int server_no;
+ bool is_secondary;
+ struct cpu_thread *primary;
+};
+static struct cpu_thread *this_cpu(void);
+
+#define CPUS 4
+
+static struct cpu_thread fake_cpus[CPUS];
+
+static inline struct cpu_thread *next_cpu(struct cpu_thread *cpu)
+{
+ if (cpu == NULL)
+ return &fake_cpus[0];
+ cpu++;
+ if (cpu == &fake_cpus[CPUS])
+ return NULL;
+ return cpu;
+}
+
+#define first_cpu() next_cpu(NULL)
+
+#define for_each_cpu(cpu) \
+ for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu))
+
+static unsigned long timestamp;
+static unsigned long mftb(void)
+{
+ return timestamp;
+}
+
+static void *local_alloc(unsigned int chip_id,
+ size_t size, size_t align)
+{
+ void *p;
+
+ (void)chip_id;
+ if (posix_memalign(&p, align, size))
+ p = NULL;
+ return p;
+}
+
+struct dt_node;
+extern struct dt_node *opal_node;
+
+#include "../trace.c"
+
+#define rmb() lwsync()
+
+#include "../external/trace.c"
+#include "../device.c"
+
+char __rodata_start[1], __rodata_end[1];
+struct dt_node *opal_node;
+struct debug_descriptor debug_descriptor = {
+ .trace_mask = -1
+};
+
+void lock(struct lock *l)
+{
+ assert(!l->lock_val);
+ l->lock_val = 1;
+}
+
+void unlock(struct lock *l)
+{
+ assert(l->lock_val);
+ l->lock_val = 0;
+}
+
+struct cpu_thread *my_fake_cpu;
+static struct cpu_thread *this_cpu(void)
+{
+ return my_fake_cpu;
+}
+
+#include <sys/mman.h>
+#define PER_CHILD_TRACES (1024*1024)
+
+static void write_trace_entries(int id)
+{
+ void exit(int);
+ unsigned int i;
+ union trace trace;
+
+ timestamp = id;
+ for (i = 0; i < PER_CHILD_TRACES; i++) {
+ timestamp = i * CPUS + id;
+ assert(sizeof(trace.hdr) % 8 == 0);
+ /* First child never repeats, second repeats once, etc. */
+ trace_add(&trace, 3 + ((i / (id + 1)) % 0x40),
+ sizeof(trace.hdr));
+ }
+
+ /* Final entry has special type, so parent knows it's over. */
+ trace_add(&trace, 0x70, sizeof(trace.hdr));
+ exit(0);
+}
+
+static bool all_done(const bool done[])
+{
+ unsigned int i;
+
+ for (i = 0; i < CPUS; i++)
+ if (!done[i])
+ return false;
+ return true;
+}
+
+static void test_parallel(void)
+{
+ void *p;
+ unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 };
+ unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 };
+ bool done[CPUS] = { false };
+ size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace);
+ int last = 0;
+
+ /* Use a shared mmap to test actual parallel buffers. */
+ i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1);
+ p = mmap(NULL, i, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, -1, 0);
+
+ for (i = 0; i < CPUS; i++) {
+ fake_cpus[i].trace = p + i * len;
+ fake_cpus[i].trace->tb.mask = TBUF_SZ - 1;
+ fake_cpus[i].trace->tb.max_size = sizeof(union trace);
+ fake_cpus[i].is_secondary = false;
+ }
+
+ for (i = 0; i < CPUS; i++) {
+ if (!fork()) {
+ /* Child. */
+ my_fake_cpu = &fake_cpus[i];
+ write_trace_entries(i);
+ }
+ }
+
+ while (!all_done(done)) {
+ union trace t;
+
+ for (i = 0; i < CPUS; i++) {
+ if (trace_get(&t, &fake_cpus[(i+last) % CPUS].trace->tb))
+ break;
+ }
+
+ if (i == CPUS) {
+ sched_yield();
+ continue;
+ }
+ i = (i + last) % CPUS;
+ last = i;
+
+ assert(t.hdr.cpu < CPUS);
+ assert(!done[t.hdr.cpu]);
+
+ if (t.hdr.type == TRACE_OVERFLOW) {
+ /* Conveniently, each record is 16 bytes here. */
+ assert(t.overflow.bytes_missed % 16 == 0);
+ overflows[i] += t.overflow.bytes_missed / 16;
+ num_overflows[i]++;
+ continue;
+ }
+
+ assert(t.hdr.timestamp % CPUS == t.hdr.cpu);
+ if (t.hdr.type == TRACE_REPEAT) {
+ assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat));
+ assert(t.repeat.num != 0);
+ assert(t.repeat.num <= t.hdr.cpu);
+ repeats[t.hdr.cpu] += t.repeat.num;
+ } else if (t.hdr.type == 0x70) {
+ done[t.hdr.cpu] = true;
+ } else {
+ counts[t.hdr.cpu]++;
+ }
+ }
+
+ /* Gather children. */
+ for (i = 0; i < CPUS; i++) {
+ int status;
+ wait(&status);
+ }
+
+ for (i = 0; i < CPUS; i++) {
+ printf("Child %i: %u produced, %u overflows, %llu total\n", i,
+ counts[i], overflows[i],
+ (long long)fake_cpus[i].trace->tb.end);
+ assert(counts[i] + repeats[i] <= PER_CHILD_TRACES);
+ }
+ /* Child 0 never repeats. */
+ assert(repeats[0] == 0);
+ assert(counts[0] + overflows[0] == PER_CHILD_TRACES);
+
+ /*
+ * FIXME: Other children have some fuzz, since overflows may
+ * include repeat record we already read. And odd-numbered
+ * overflows may include more repeat records than normal
+ * records (they alternate).
+ */
+}
+
+int main(void)
+{
+ union trace minimal;
+ union trace large;
+ union trace trace;
+ unsigned int i, j;
+
+ opal_node = dt_new_root("opal");
+ for (i = 0; i < CPUS; i++) {
+ fake_cpus[i].server_no = i;
+ fake_cpus[i].is_secondary = (i & 0x1);
+ fake_cpus[i].primary = &fake_cpus[i & ~0x1];
+ }
+ init_trace_buffers();
+ my_fake_cpu = &fake_cpus[0];
+
+ for (i = 0; i < CPUS; i++) {
+ assert(trace_empty(&fake_cpus[i].trace->tb));
+ assert(!trace_get(&trace, &fake_cpus[i].trace->tb));
+ }
+
+ assert(sizeof(trace.hdr) % 8 == 0);
+ timestamp = 1;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.timestamp == timestamp);
+
+ /* Make it wrap once. */
+ for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) {
+ timestamp = i;
+ trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr));
+ }
+
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ /* First one must be overflow marker. */
+ assert(trace.hdr.type == TRACE_OVERFLOW);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow));
+ assert(trace.overflow.bytes_missed == minimal.hdr.len_div_8 * 8);
+
+ for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) {
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.timestamp == i+1);
+ assert(trace.hdr.type == 99 + ((i+1)%2));
+ }
+ assert(!trace_get(&trace, &my_fake_cpu->trace->tb));
+
+ /* Now put in some weird-length ones, to test overlap.
+ * Last power of 2, minus 8. */
+ for (j = 0; (1 << j) < sizeof(large); j++);
+ for (i = 0; i < TBUF_SZ; i++) {
+ timestamp = i;
+ trace_add(&large, 100 + (i%2), (1 << (j-1)));
+ }
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.type == TRACE_OVERFLOW);
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.len_div_8 == large.hdr.len_div_8);
+ i = trace.hdr.timestamp;
+ while (trace_get(&trace, &my_fake_cpu->trace->tb))
+ assert(trace.hdr.timestamp == ++i);
+
+ /* Test repeats. */
+ for (i = 0; i < 65538; i++) {
+ timestamp = i;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ }
+ timestamp = i;
+ trace_add(&minimal, 101, sizeof(trace.hdr));
+ timestamp = i+1;
+ trace_add(&minimal, 101, sizeof(trace.hdr));
+
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.timestamp == 0);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 100);
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(trace.repeat.num == 65535);
+ assert(trace.repeat.timestamp == 65535);
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.timestamp == 65536);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 100);
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(trace.repeat.num == 1);
+ assert(trace.repeat.timestamp == 65537);
+
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.timestamp == 65538);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 101);
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(trace.repeat.num == 1);
+ assert(trace.repeat.timestamp == 65539);
+
+ /* Now, test adding repeat while we're reading... */
+ timestamp = 0;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ assert(trace.hdr.timestamp == 0);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ assert(trace.hdr.type == 100);
+
+ for (i = 1; i < TBUF_SZ; i++) {
+ timestamp = i;
+ trace_add(&minimal, 100, sizeof(trace.hdr));
+ assert(trace_get(&trace, &my_fake_cpu->trace->tb));
+ if (i % 65536 == 0) {
+ assert(trace.hdr.type == 100);
+ assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8);
+ } else {
+ assert(trace.hdr.type == TRACE_REPEAT);
+ assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat));
+ assert(trace.repeat.num == 1);
+ }
+ assert(trace.repeat.timestamp == i);
+ assert(!trace_get(&trace, &my_fake_cpu->trace->tb));
+ }
+
+ for (i = 0; i < CPUS; i++)
+ if (!fake_cpus[i].is_secondary)
+ free(fake_cpus[i].trace);
+
+ test_parallel();
+
+ return 0;
+}
diff --git a/core/test/stubs.c b/core/test/stubs.c
new file mode 100644
index 0000000..3233455
--- /dev/null
+++ b/core/test/stubs.c
@@ -0,0 +1,43 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Add any stub functions required for linking here. */
+#include <stdlib.h>
+
+static void stub_function(void)
+{
+ abort();
+}
+
+#define STUB(fnname) \
+ void fnname(void) __attribute__((weak, alias ("stub_function")))
+
+STUB(fdt_begin_node);
+STUB(fdt_property);
+STUB(fdt_end_node);
+STUB(fdt_create);
+STUB(fdt_add_reservemap_entry);
+STUB(fdt_finish_reservemap);
+STUB(fdt_strerror);
+STUB(fdt_check_header);
+STUB(_fdt_check_node_offset);
+STUB(fdt_next_tag);
+STUB(fdt_string);
+STUB(fdt_get_name);
+STUB(dt_first);
+STUB(dt_next);
+STUB(dt_has_node_property);
+STUB(dt_get_address);
+STUB(add_chip_dev_associativity);
diff --git a/core/timebase.c b/core/timebase.c
new file mode 100644
index 0000000..d51e96b
--- /dev/null
+++ b/core/timebase.c
@@ -0,0 +1,67 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <timebase.h>
+#include <fsp.h>
+
+void time_wait(unsigned long duration)
+{
+ unsigned long end = mftb() + duration;
+
+ while(tb_compare(mftb(), end) != TB_AAFTERB)
+ fsp_poll();
+}
+
+void time_wait_ms(unsigned long ms)
+{
+ time_wait(msecs_to_tb(ms));
+}
+
+void time_wait_us(unsigned long us)
+{
+ time_wait(usecs_to_tb(us));
+}
+
+unsigned long timespec_to_tb(const struct timespec *ts)
+{
+ unsigned long ns;
+
+ /* First convert to ns */
+ ns = ts->tv_sec * 1000000000ul;
+ ns += ts->tv_nsec;
+
+ /*
+ * This is a very rough approximation, it works provided
+ * we never try to pass too long delays here and the TB
+ * frequency isn't significantly lower than 512Mhz.
+ *
+ * We could improve the precision by shifting less bits
+ * at the expense of capacity or do 128 bit math which
+ * I'm not eager to do :-)
+ */
+ return (ns * (tb_hz >> 24)) / (1000000000ul >> 24);
+}
+
+int nanosleep(const struct timespec *req, struct timespec *rem)
+{
+ time_wait(timespec_to_tb(req));
+
+ if (rem) {
+ rem->tv_sec = 0;
+ rem->tv_nsec = 0;
+ }
+ return 0;
+}
diff --git a/core/trace.c b/core/trace.c
new file mode 100644
index 0000000..76f3c30
--- /dev/null
+++ b/core/trace.c
@@ -0,0 +1,244 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <trace.h>
+#include <timebase.h>
+#include <lock.h>
+#include <string.h>
+#include <stdlib.h>
+#include <cpu.h>
+#include <device.h>
+#include <libfdt.h>
+#include <processor.h>
+#include <skiboot.h>
+
+#define DEBUG_TRACES
+
+#define MAX_SIZE (sizeof(union trace) + 7)
+
+/* Smaller trace buffer for early booting */
+#define BOOT_TBUF_SZ 65536
+static struct {
+ struct trace_info trace_info;
+ char buf[BOOT_TBUF_SZ + MAX_SIZE];
+} boot_tracebuf __section(".data.boot_trace") = {
+ .trace_info = {
+ .lock = LOCK_UNLOCKED,
+ .tb = {
+ .mask = BOOT_TBUF_SZ - 1,
+ .max_size = MAX_SIZE
+ },
+ },
+ .buf = { 0 }
+};
+
+void init_boot_tracebuf(struct cpu_thread *boot_cpu)
+{
+ boot_cpu->trace = &boot_tracebuf.trace_info;
+}
+
+static size_t tracebuf_extra(void)
+{
+ /* We make room for the largest possible record */
+ return TBUF_SZ + MAX_SIZE;
+}
+
+/* To avoid bloating each entry, repeats are actually specific entries.
+ * tb->last points to the last (non-repeat) entry. */
+static bool handle_repeat(struct tracebuf *tb, const union trace *trace)
+{
+ struct trace_hdr *prev;
+ struct trace_repeat *rpt;
+ u32 len;
+
+ prev = (void *)tb->buf + (tb->last & tb->mask);
+
+ if (prev->type != trace->hdr.type
+ || prev->len_div_8 != trace->hdr.len_div_8
+ || prev->cpu != trace->hdr.cpu)
+ return false;
+
+ len = prev->len_div_8 << 3;
+ if (memcmp(prev + 1, &trace->hdr + 1, len - sizeof(*prev)) != 0)
+ return false;
+
+ /* If they've consumed prev entry, don't repeat. */
+ if (tb->last < tb->start)
+ return false;
+
+ /* OK, it's a duplicate. Do we already have repeat? */
+ if (tb->last + len != tb->end) {
+ /* FIXME: Reader is not protected from seeing this! */
+ rpt = (void *)tb->buf + ((tb->last + len) & tb->mask);
+ assert(tb->last + len + rpt->len_div_8*8 == tb->end);
+ assert(rpt->type == TRACE_REPEAT);
+
+ /* If this repeat entry is full, don't repeat. */
+ if (rpt->num == 0xFFFF)
+ return false;
+
+ rpt->num++;
+ rpt->timestamp = trace->hdr.timestamp;
+ return true;
+ }
+
+ /*
+ * Generate repeat entry: it's the smallest possible entry, so we
+ * must have eliminated old entries.
+ */
+ assert(trace->hdr.len_div_8 * 8 >= sizeof(*rpt));
+
+ rpt = (void *)tb->buf + (tb->end & tb->mask);
+ rpt->timestamp = trace->hdr.timestamp;
+ rpt->type = TRACE_REPEAT;
+ rpt->len_div_8 = sizeof(*rpt) >> 3;
+ rpt->cpu = trace->hdr.cpu;
+ rpt->prev_len = trace->hdr.len_div_8 << 3;
+ rpt->num = 1;
+ lwsync(); /* write barrier: complete repeat record before exposing */
+ tb->end += sizeof(*rpt);
+ return true;
+}
+
+void trace_add(union trace *trace, u8 type, u16 len)
+{
+ struct trace_info *ti = this_cpu()->trace;
+ unsigned int tsz;
+
+ trace->hdr.type = type;
+ trace->hdr.len_div_8 = (len + 7) >> 3;
+
+ tsz = trace->hdr.len_div_8 << 3;
+
+#ifdef DEBUG_TRACES
+ assert(tsz >= sizeof(trace->hdr));
+ assert(tsz <= sizeof(*trace));
+ assert(trace->hdr.type != TRACE_REPEAT);
+ assert(trace->hdr.type != TRACE_OVERFLOW);
+#endif
+ /* Skip traces not enabled in the debug descriptor */
+ if (!((1ul << trace->hdr.type) & debug_descriptor.trace_mask))
+ return;
+
+ trace->hdr.timestamp = mftb();
+ trace->hdr.cpu = this_cpu()->server_no;
+
+ lock(&ti->lock);
+
+ /* Throw away old entries before we overwrite them. */
+ while ((ti->tb.start + ti->tb.mask + 1) < (ti->tb.end + tsz)) {
+ struct trace_hdr *hdr;
+
+ hdr = (void *)ti->tb.buf + (ti->tb.start & ti->tb.mask);
+ ti->tb.start += hdr->len_div_8 << 3;
+ }
+
+ /* Must update ->start before we rewrite new entries. */
+ lwsync(); /* write barrier */
+
+ /* Check for duplicates... */
+ if (!handle_repeat(&ti->tb, trace)) {
+ /* This may go off end, and that's why ti->tb.buf is oversize */
+ memcpy(ti->tb.buf + (ti->tb.end & ti->tb.mask), trace, tsz);
+ ti->tb.last = ti->tb.end;
+ lwsync(); /* write barrier: write entry before exposing */
+ ti->tb.end += tsz;
+ }
+ unlock(&ti->lock);
+}
+
+static void trace_add_dt_props(void)
+{
+ unsigned int i;
+ u64 *prop, tmask;
+
+ prop = malloc(sizeof(u64) * 2 * debug_descriptor.num_traces);
+
+ for (i = 0; i < debug_descriptor.num_traces; i++) {
+ prop[i * 2] = cpu_to_fdt64(debug_descriptor.trace_phys[i]);
+ prop[i * 2 + 1] = cpu_to_fdt64(debug_descriptor.trace_size[i]);
+ }
+
+ dt_add_property(opal_node, "ibm,opal-traces",
+ prop, sizeof(u64) * 2 * i);
+ free(prop);
+
+ tmask = (uint64_t)&debug_descriptor.trace_mask;
+ dt_add_property_cells(opal_node, "ibm,opal-trace-mask",
+ hi32(tmask), lo32(tmask));
+}
+
+static void trace_add_desc(struct trace_info *t, uint64_t size)
+{
+ unsigned int i = debug_descriptor.num_traces;
+
+ if (i >= DEBUG_DESC_MAX_TRACES) {
+ prerror("TRACE: Debug descriptor trace list full !\n");
+ return;
+ }
+ debug_descriptor.num_traces++;
+
+ debug_descriptor.trace_phys[i] = (uint64_t)&t->tb;
+ debug_descriptor.trace_tce[i] = 0; /* populated later */
+ debug_descriptor.trace_size[i] = size;
+}
+
+/* Allocate trace buffers once we know memory topology */
+void init_trace_buffers(void)
+{
+ struct cpu_thread *t;
+ struct trace_info *any = &boot_tracebuf.trace_info;
+ uint64_t size;
+
+ /* Boot the boot trace in the debug descriptor */
+ trace_add_desc(any, sizeof(boot_tracebuf.buf));
+
+ /* Allocate a trace buffer for each primary cpu. */
+ for_each_cpu(t) {
+ if (t->is_secondary)
+ continue;
+
+ /* Use a 4K alignment for TCE mapping */
+ size = ALIGN_UP(sizeof(*t->trace) + tracebuf_extra(), 0x1000);
+ t->trace = local_alloc(t->chip_id, size, 0x1000);
+ if (t->trace) {
+ any = t->trace;
+ memset(t->trace, 0, size);
+ init_lock(&t->trace->lock);
+ t->trace->tb.mask = TBUF_SZ - 1;
+ t->trace->tb.max_size = MAX_SIZE;
+ trace_add_desc(any, sizeof(t->trace->tb) +
+ tracebuf_extra());
+ } else
+ prerror("TRACE: cpu 0x%x allocation failed\n", t->pir);
+ }
+
+ /* In case any allocations failed, share trace buffers. */
+ for_each_cpu(t) {
+ if (!t->is_secondary && !t->trace)
+ t->trace = any;
+ }
+
+ /* And copy those to the secondaries. */
+ for_each_cpu(t) {
+ if (!t->is_secondary)
+ continue;
+ t->trace = t->primary->trace;
+ }
+
+ /* Trace node in DT. */
+ trace_add_dt_props();
+}
diff --git a/core/utils.c b/core/utils.c
new file mode 100644
index 0000000..2bc57b1
--- /dev/null
+++ b/core/utils.c
@@ -0,0 +1,59 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <lock.h>
+#include <fsp.h>
+#include <processor.h>
+
+void abort(void)
+{
+ static bool in_abort = false;
+ unsigned long hid0;
+
+ if (in_abort)
+ for (;;) ;
+ in_abort = true;
+
+ bust_locks = true;
+
+ op_display(OP_FATAL, OP_MOD_CORE, 0x6666);
+
+ fputs("Aborting!\n", stderr);
+ backtrace();
+
+ /* XXX FIXME: We should fsp_poll for a while to ensure any pending
+ * console writes have made it out, but until we have decent PSI
+ * link handling we must not do it forever. Polling can prevent the
+ * FSP from bringing the PSI link up and it can get stuck in a
+ * reboot loop.
+ */
+
+ hid0 = mfspr(SPR_HID0);
+ hid0 |= SPR_HID0_ENABLE_ATTN;
+ set_hid0(hid0);
+ trigger_attn();
+ for (;;) ;
+}
+
+char __attrconst tohex(uint8_t nibble)
+{
+ static const char __tohex[] = {'0','1','2','3','4','5','6','7','8','9',
+ 'A','B','C','D','E','F'};
+ if (nibble > 0xf)
+ return '?';
+ return __tohex[nibble];
+}
diff --git a/core/vpd.c b/core/vpd.c
new file mode 100644
index 0000000..deb552c
--- /dev/null
+++ b/core/vpd.c
@@ -0,0 +1,211 @@
+/* Copyright 2013-2014 IBM Corp.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <skiboot.h>
+#include <vpd.h>
+#include <string.h>
+#include <fsp.h>
+#include <device.h>
+
+#define CHECK_SPACE(_p, _n, _e) (((_e) - (_p)) >= (_n))
+
+/* Low level keyword search in a record. Can be used when we
+ * need to find the next keyword of a given type, for example
+ * when having multiple MF/SM keyword pairs
+ */
+const void *vpd_find_keyword(const void *rec, size_t rec_sz,
+ const char *kw, uint8_t *kw_size)
+{
+ const uint8_t *p = rec, *end = rec + rec_sz;
+
+ while (CHECK_SPACE(p, 3, end)) {
+ uint8_t k1 = *(p++);
+ uint8_t k2 = *(p++);
+ uint8_t sz = *(p++);
+
+ if (k1 == kw[0] && k2 == kw[1]) {
+ if (kw_size)
+ *kw_size = sz;
+ return p;
+ }
+ p += sz;
+ }
+ return NULL;
+}
+
+/* Locate a record in a VPD blob
+ *
+ * Note: This works with VPD LIDs. It will scan until it finds
+ * the first 0x84, so it will skip all those 0's that the VPD
+ * LIDs seem to contain
+ */
+const void *vpd_find_record(const void *vpd, size_t vpd_size,
+ const char *record, size_t *sz)
+{
+ const uint8_t *p = vpd, *end = vpd + vpd_size;
+ bool first_start = true;
+ size_t rec_sz;
+ uint8_t namesz = 0;
+ const char *rec_name;
+
+ while (CHECK_SPACE(p, 4, end)) {
+ /* Get header byte */
+ if (*(p++) != 0x84) {
+ /* Skip initial crap in VPD LIDs */
+ if (first_start)
+ continue;
+ break;
+ }
+ first_start = false;
+ rec_sz = *(p++);
+ rec_sz |= *(p++) << 8;
+ if (!CHECK_SPACE(p, rec_sz, end)) {
+ prerror("VPD: Malformed or truncated VPD,"
+ " record size doesn't fit\n");
+ return NULL;
+ }
+
+ /* Find record name */
+ rec_name = vpd_find_keyword(p, rec_sz, "RT", &namesz);
+ if (rec_name && strncmp(record, rec_name, namesz) == 0) {
+ *sz = rec_sz;
+ return p;
+ }
+
+ p += rec_sz;
+ if (*(p++) != 0x78) {
+ prerror("VPD: Malformed or truncated VPD,"
+ " missing final 0x78 in record %.4s\n",
+ rec_name ? rec_name : "????");
+ return NULL;
+ }
+ }
+ return NULL;
+}
+
+/* Locate a keyword in a record in a VPD blob
+ *
+ * Note: This works with VPD LIDs. It will scan until it finds
+ * the first 0x84, so it will skip all those 0's that the VPD
+ * LIDs seem to contain
+ */
+const void *vpd_find(const void *vpd, size_t vpd_size,
+ const char *record, const char *keyword,
+ uint8_t *sz)
+{
+ size_t rec_sz;
+ const uint8_t *p;
+
+ p = vpd_find_record(vpd, vpd_size, record, &rec_sz);
+ if (p)
+ p = vpd_find_keyword(p, rec_sz, keyword, sz);
+ return p;
+}
+
+/* Helper to load a VPD LID. Pass a ptr to the corresponding LX keyword */
+static void *vpd_lid_load(const uint8_t *lx, uint8_t lxrn, size_t *size)
+{
+ /* Now this is a guess game as we don't have the info from the
+ * pHyp folks. But basically, it seems to boil down to loading
+ * a LID whose name is 0x80e000yy where yy is the last 2 digits
+ * of the LX record in hex.
+ *
+ * [ Correction: After a chat with some folks, it looks like it's
+ * actually 4 digits, though the lid number is limited to fff
+ * so we weren't far off. ]
+ *
+ * For safety, we look for a matching LX record in an LXRn
+ * (n = lxrn argument) or in VINI if lxrn=0xff
+ */
+ uint32_t lid_no = 0x80e00000 | ((lx[6] & 0xf) << 8) | lx[7];
+
+ /* We don't quite know how to get to the LID directory so
+ * we don't know the size. Let's allocate 16K. All the VPD LIDs
+ * I've seen so far are much smaller.
+ */
+#define VPD_LID_MAX_SIZE 0x4000
+ void *data = malloc(VPD_LID_MAX_SIZE);
+ char record[4] = "LXR0";
+ const void *valid_lx;
+ uint8_t lx_size;
+ int rc;
+
+ if (!data) {
+ prerror("VPD: Failed to allocate memory for LID\n");
+ return NULL;
+ }
+
+ /* Adjust LID number for flash side */
+ lid_no = fsp_adjust_lid_side(lid_no);
+ printf("VPD: Trying to load VPD LID 0x%08x...\n", lid_no);
+
+ *size = VPD_LID_MAX_SIZE;
+
+ /* Load it from the FSP */
+ rc = fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid_no, 0, data, size);
+ if (rc) {
+ prerror("VPD: Error %d loading VPD LID\n", rc);
+ goto fail;
+ }
+
+ /* Validate it */
+ if (lxrn < 9)
+ record[3] = '0' + lxrn;
+ else
+ memcpy(record, "VINI", 4);
+
+ valid_lx = vpd_find(data, *size, record, "LX", &lx_size);
+ if (!valid_lx || lx_size != 8) {
+ prerror("VPD: Cannot find validation LX record\n");
+ goto fail;
+ }
+ if (memcmp(valid_lx, lx, 8) != 0) {
+ prerror("VPD: LX record mismatch !\n");
+ goto fail;
+ }
+
+ printf("VPD: Loaded %zu bytes\n", *size);
+
+ /* Got it ! */
+ return realloc(data, *size);
+ fail:
+ free(data);
+ return NULL;
+}
+
+void vpd_iohub_load(struct dt_node *hub_node)
+{
+ void *vpd;
+ size_t sz;
+ const uint32_t *p;
+ unsigned int lx_idx;
+ const char *lxr;
+
+ p = dt_prop_get_def(hub_node, "ibm,vpd-lx-info", NULL);
+ if (!p)
+ return;
+
+ lx_idx = p[0];
+ lxr = (const char *)&p[1];
+
+ vpd = vpd_lid_load(lxr, lx_idx, &sz);
+ if (!vpd) {
+ prerror("VPD: Failed to load VPD LID\n");
+ } else {
+ dt_add_property(hub_node, "ibm,io-vpd", vpd, sz);
+ free(vpd);
+ }
+}