diff options
Diffstat (limited to 'core')
39 files changed, 11572 insertions, 0 deletions
diff --git a/core/Makefile.inc b/core/Makefile.inc new file mode 100644 index 0000000..843ce05 --- /dev/null +++ b/core/Makefile.inc @@ -0,0 +1,12 @@ +# -*-Makefile-*- + +SUBDIRS += core +CORE_OBJS = relocate.o console.o backtrace.o init.o chip.o mem_region.o +CORE_OBJS += malloc.o lock.o cpu.o utils.o fdt.o opal.o interrupts.o +CORE_OBJS += timebase.o opal-msg.o pci.o pci-opal.o fast-reboot.o +CORE_OBJS += device.o exceptions.o trace.o affinity.o vpd.o +CORE_OBJS += hostservices.o platform.o nvram.o flash-nvram.o +CORE=core/built-in.o + +$(CORE): $(CORE_OBJS:%=core/%) + diff --git a/core/affinity.c b/core/affinity.c new file mode 100644 index 0000000..d5eea82 --- /dev/null +++ b/core/affinity.c @@ -0,0 +1,132 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * + * We currently construct our associativity properties as such: + * + * - For "chip" devices (bridges, memory, ...), 4 entries: + * + * - CCM node ID + * - HW card ID + * - HW module ID + * - Chip ID + * + * The information is constructed based on the chip ID which (unlike + * pHyp) is our HW chip ID (aka "XSCOM" chip ID). We use it to retrieve + * the other properties from the corresponding chip/xscom node in the + * device-tree. If those properties are absent, 0 is used. + * + * - For "core" devices, we add a 5th entry: + * + * - Core ID + * + * Here too, we do not use the "cooked" HW processor ID from HDAT but + * intead use the real HW core ID which is basically the interrupt + * server number of thread 0 on that core. + * + * + * The ibm,associativity-reference-points property is currently set to + * 4,4 indicating that the chip ID is our only reference point. This + * should be extended to encompass the node IDs eventually. + */ +#include <skiboot.h> +#include <opal.h> +#include <device.h> +#include <console.h> +#include <trace.h> +#include <chip.h> +#include <cpu.h> +#include <affinity.h> + +static uint32_t get_chip_node_id(struct proc_chip *chip) +{ + /* If the xscom node has an ibm,ccm-node-id property, use it */ + if (dt_has_node_property(chip->devnode, "ibm,ccm-node-id", NULL)) + return dt_prop_get_u32(chip->devnode, "ibm,ccm-node-id"); + + /* + * Else use the 3 top bits of the chip ID which should be + * the node on both P7 and P8 + */ + return chip->id >> 3; +} + +void add_associativity_ref_point(void) +{ + int ref2 = 0x4; + + /* + * Note about our use of reference points: + * + * Linux currently supports two levels of NUMA. We use the first + * reference point for the node ID and the second reference point + * for a second level of affinity. We always use the chip ID (4) + * for the first reference point. + * + * Choosing the second level of affinity is model specific + * unfortunately. Current POWER8E models should use the DCM + * as a second level of NUMA. + * + * If there is a way to obtain this information from the FSP + * that would be ideal, but for now hardwire our POWER8E setting. + */ + if (PVR_TYPE(mfspr(SPR_PVR)) == PVR_TYPE_P8E) + ref2 = 0x3; + + dt_add_property_cells(opal_node, "ibm,associativity-reference-points", + 0x4, ref2); +} + +void add_chip_dev_associativity(struct dt_node *dev) +{ + uint32_t chip_id = dt_get_chip_id(dev); + struct proc_chip *chip = get_chip(chip_id); + uint32_t hw_cid, hw_mid; + + if (!chip) + return; + + hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0); + hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0); + + dt_add_property_cells(dev, "ibm,associativity", 4, + get_chip_node_id(chip), + hw_cid, hw_mid, chip_id); +} + +void add_core_associativity(struct cpu_thread *cpu) +{ + struct proc_chip *chip = get_chip(cpu->chip_id); + uint32_t hw_cid, hw_mid, core_id; + + if (!chip) + return; + + if (proc_gen == proc_gen_p7) + core_id = (cpu->pir >> 2) & 0x7; + else if (proc_gen == proc_gen_p8) + core_id = (cpu->pir >> 3) & 0xf; + else + return; + + hw_cid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-card-id", 0); + hw_mid = dt_prop_get_u32_def(chip->devnode, "ibm,hw-module-id", 0); + + dt_add_property_cells(cpu->node, "ibm,associativity", 5, + get_chip_node_id(chip), + hw_cid, hw_mid, chip->id, core_id); +} diff --git a/core/backtrace.c b/core/backtrace.c new file mode 100644 index 0000000..3439db0 --- /dev/null +++ b/core/backtrace.c @@ -0,0 +1,41 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <skiboot.h> +#include <processor.h> +#include <cpu.h> + +void backtrace(void) +{ + unsigned int pir = mfspr(SPR_PIR); + unsigned long *sp; + unsigned long *bottom, *top; + + /* Check if there's a __builtin_something instead */ + asm("mr %0,1" : "=r" (sp)); + + bottom = cpu_stack_bottom(pir); + top = cpu_stack_top(pir); + + /* XXX Handle SMP */ + fprintf(stderr, "CPU %08x Backtrace:\n", pir); + while(sp > bottom && sp < top) { + fprintf(stderr, " S: %016lx R: %016lx\n", + (unsigned long)sp, sp[2]); + sp = (unsigned long *)sp[0]; + } +} diff --git a/core/chip.c b/core/chip.c new file mode 100644 index 0000000..e6eb81c --- /dev/null +++ b/core/chip.c @@ -0,0 +1,85 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <skiboot.h> +#include <chip.h> +#include <device.h> + +static struct proc_chip *chips[MAX_CHIPS]; + +uint32_t pir_to_chip_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p8) + return P8_PIR2GCID(pir); + else + return P7_PIR2GCID(pir); +} + +uint32_t pir_to_core_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p8) + return P8_PIR2COREID(pir); + else + return P7_PIR2COREID(pir); +} + +uint32_t pir_to_thread_id(uint32_t pir) +{ + if (proc_gen == proc_gen_p8) + return P8_PIR2THREADID(pir); + else + return P7_PIR2THREADID(pir); +} + +struct proc_chip *next_chip(struct proc_chip *chip) +{ + unsigned int i; + + for (i = chip ? (chip->id + 1) : 0; i < MAX_CHIPS; i++) + if (chips[i]) + return chips[i]; + return NULL; +} + + +struct proc_chip *get_chip(uint32_t chip_id) +{ + return chips[chip_id]; +} + +void init_chips(void) +{ + struct proc_chip *chip; + struct dt_node *xn; + + /* We walk the chips based on xscom nodes in the tree */ + dt_for_each_compatible(dt_root, xn, "ibm,xscom") { + uint32_t id = dt_get_chip_id(xn); + + assert(id < MAX_CHIPS); + + chip = zalloc(sizeof(struct proc_chip)); + assert(chip); + chip->id = id; + chip->devnode = xn; + chips[id] = chip; + chip->dbob_id = dt_prop_get_u32_def(xn, "ibm,dbob-id", + 0xffffffff); + chip->pcid = dt_prop_get_u32_def(xn, "ibm,proc-chip-id", + 0xffffffff); + }; +} diff --git a/core/console.c b/core/console.c new file mode 100644 index 0000000..b291b1b --- /dev/null +++ b/core/console.c @@ -0,0 +1,334 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Console IO routine for use by libc + * + * fd is the classic posix 0,1,2 (stdin, stdout, stderr) + */ +#include <skiboot.h> +#include <unistd.h> +#include <console.h> +#include <opal.h> +#include <device.h> +#include <processor.h> +#include <cpu.h> + +static char *con_buf = (char *)INMEM_CON_START; +static size_t con_in; +static size_t con_out; +static bool con_wrapped; +static struct con_ops *con_driver; + +struct lock con_lock = LOCK_UNLOCKED; + +/* This is mapped via TCEs so we keep it alone in a page */ +struct memcons memcons __section(".data.memcons") = { + .magic = MEMCONS_MAGIC, + .obuf_phys = INMEM_CON_START, + .ibuf_phys = INMEM_CON_START + INMEM_CON_OUT_LEN, + .obuf_size = INMEM_CON_OUT_LEN, + .ibuf_size = INMEM_CON_IN_LEN, +}; + +bool dummy_console_enabled(void) +{ +#ifdef FORCE_DUMMY_CONSOLE + return true; +#else + return dt_has_node_property(dt_chosen, + "sapphire,enable-dummy-console", NULL); +#endif +} + +void force_dummy_console(void) +{ + dt_add_property(dt_chosen, "sapphire,enable-dummy-console", NULL, 0); +} + +#ifdef MAMBO_CONSOLE +static void mambo_write(const char *buf, size_t count) +{ +#define SIM_WRITE_CONSOLE_CODE 0 + register int c asm("r3") = 0; /* SIM_WRITE_CONSOLE_CODE */ + register unsigned long a1 asm("r4") = (unsigned long)buf; + register unsigned long a2 asm("r5") = count; + register unsigned long a3 asm("r6") = 0; + asm volatile (".long 0x000eaeb0":"=r" (c):"r"(c), "r"(a1), "r"(a2), + "r"(a3)); +} +#else +static void mambo_write(const char *buf __unused, size_t count __unused) { } +#endif /* MAMBO_CONSOLE */ + +void clear_console(void) +{ + memset(con_buf, 0, INMEM_CON_LEN); +} + +/* + * Flush the console buffer into the driver, returns true + * if there is more to go + */ +bool __flush_console(void) +{ + struct cpu_thread *cpu = this_cpu(); + size_t req, len = 0; + static bool in_flush, more_flush; + + /* Is there anything to flush ? Bail out early if not */ + if (con_in == con_out || !con_driver) + return false; + + /* + * Console flushing is suspended on this CPU, typically because + * some critical locks are held that would potentially case a + * flush to deadlock + */ + if (cpu->con_suspend) { + cpu->con_need_flush = true; + return false; + } + cpu->con_need_flush = false; + + /* + * We must call the underlying driver with the console lock + * dropped otherwise we get some deadlocks if anything down + * that path tries to printf() something. + * + * So instead what we do is we keep a static in_flush flag + * set/released with the lock held, which is used to prevent + * concurrent attempts at flushing the same chunk of buffer + * by other processors. + */ + if (in_flush) { + more_flush = true; + return false; + } + in_flush = true; + + do { + more_flush = false; + if (con_out > con_in) { + req = INMEM_CON_OUT_LEN - con_out; + unlock(&con_lock); + len = con_driver->write(con_buf + con_out, req); + lock(&con_lock); + con_out = (con_out + len) % INMEM_CON_OUT_LEN; + if (len < req) + goto bail; + } + if (con_out < con_in) { + unlock(&con_lock); + len = con_driver->write(con_buf + con_out, + con_in - con_out); + lock(&con_lock); + con_out = (con_out + len) % INMEM_CON_OUT_LEN; + } + } while(more_flush); +bail: + in_flush = false; + return con_out != con_in; +} + +bool flush_console(void) +{ + bool ret; + + lock(&con_lock); + ret = __flush_console(); + unlock(&con_lock); + + return ret; +} + +static void inmem_write(char c) +{ + uint32_t opos; + + if (!c) + return; + con_buf[con_in++] = c; + if (con_in >= INMEM_CON_OUT_LEN) { + con_in = 0; + con_wrapped = true; + } + + /* + * We must always re-generate memcons.out_pos because + * under some circumstances, the console script will + * use a broken putmemproc that does RMW on the full + * 8 bytes containing out_pos and in_prod, thus corrupting + * out_pos + */ + opos = con_in; + if (con_wrapped) + opos |= MEMCONS_OUT_POS_WRAP; + lwsync(); + memcons.out_pos = opos; + + /* If head reaches tail, push tail around & drop chars */ + if (con_in == con_out) + con_out = (con_in + 1) % INMEM_CON_OUT_LEN; +} + +static size_t inmem_read(char *buf, size_t req) +{ + size_t read = 0; + char *ibuf = (char *)memcons.ibuf_phys; + + while (req && memcons.in_prod != memcons.in_cons) { + *(buf++) = ibuf[memcons.in_cons]; + lwsync(); + memcons.in_cons = (memcons.in_cons + 1) % INMEM_CON_IN_LEN; + req--; + read++; + } + return read; +} + +static void write_char(char c) +{ + mambo_write(&c, 1); + inmem_write(c); +} + +ssize_t write(int fd __unused, const void *buf, size_t count) +{ + /* We use recursive locking here as we can get called + * from fairly deep debug path + */ + bool need_unlock = lock_recursive(&con_lock); + const char *cbuf = buf; + + while(count--) { + char c = *(cbuf++); + if (c == 10) + write_char(13); + write_char(c); + } + + __flush_console(); + + if (need_unlock) + unlock(&con_lock); + + return count; +} + +ssize_t read(int fd __unused, void *buf, size_t req_count) +{ + bool need_unlock = lock_recursive(&con_lock); + size_t count = 0; + + if (con_driver && con_driver->read) + count = con_driver->read(buf, req_count); + if (!count) + count = inmem_read(buf, req_count); + if (need_unlock) + unlock(&con_lock); + return count; +} + +void set_console(struct con_ops *driver) +{ + con_driver = driver; + if (driver) + flush_console(); +} + +void memcons_add_properties(void) +{ + uint64_t addr = (u64)&memcons; + + dt_add_property_cells(opal_node, "ibm,opal-memcons", + hi32(addr), lo32(addr)); +} + +/* + * Default OPAL console provided if nothing else overrides it + */ +static int64_t dummy_console_write(int64_t term_number, int64_t *length, + const uint8_t *buffer) +{ + if (term_number != 0) + return OPAL_PARAMETER; + write(0, buffer, *length); + + return OPAL_SUCCESS; +} +opal_call(OPAL_CONSOLE_WRITE, dummy_console_write, 3); + +static int64_t dummy_console_write_buffer_space(int64_t term_number, + int64_t *length) +{ + if (term_number != 0) + return OPAL_PARAMETER; + if (length) + *length = INMEM_CON_OUT_LEN; + + return OPAL_SUCCESS; +} +opal_call(OPAL_CONSOLE_WRITE_BUFFER_SPACE, dummy_console_write_buffer_space, 2); + +static int64_t dummy_console_read(int64_t term_number, int64_t *length, + uint8_t *buffer) +{ + if (term_number != 0) + return OPAL_PARAMETER; + *length = read(0, buffer, *length); + + return OPAL_SUCCESS; +} +opal_call(OPAL_CONSOLE_READ, dummy_console_read, 3); + +static void dummy_console_poll(void *data __unused) +{ + bool uart_has_data; + + lock(&con_lock); + uart_has_data = uart_console_poll(); + + if (uart_has_data || memcons.in_prod != memcons.in_cons) + opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, + OPAL_EVENT_CONSOLE_INPUT); + else + opal_update_pending_evt(OPAL_EVENT_CONSOLE_INPUT, 0); + unlock(&con_lock); + +} + +void dummy_console_add_nodes(void) +{ + struct dt_node *con, *consoles; + + consoles = dt_new(opal_node, "consoles"); + assert(consoles); + dt_add_property_cells(consoles, "#address-cells", 1); + dt_add_property_cells(consoles, "#size-cells", 0); + + con = dt_new_addr(consoles, "serial", 0); + assert(con); + dt_add_property_string(con, "compatible", "ibm,opal-console-raw"); + dt_add_property_cells(con, "#write-buffer-size", INMEM_CON_OUT_LEN); + dt_add_property_cells(con, "reg", 0); + dt_add_property_string(con, "device_type", "serial"); + + dt_add_property_string(dt_chosen, "linux,stdout-path", + "/ibm,opal/consoles/serial@0"); + + opal_add_poller(dummy_console_poll, NULL); +} diff --git a/core/cpu.c b/core/cpu.c new file mode 100644 index 0000000..0eea946 --- /dev/null +++ b/core/cpu.c @@ -0,0 +1,672 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * TODO: Index array by PIR to be able to catch them easily + * from assembly such as machine checks etc... + */ +#include <skiboot.h> +#include <cpu.h> +#include <fsp.h> +#include <device.h> +#include <opal.h> +#include <stack.h> +#include <trace.h> +#include <affinity.h> +#include <chip.h> +#include <timebase.h> +#include <ccan/str/str.h> +#include <ccan/container_of/container_of.h> + +/* The cpu_threads array is static and indexed by PIR in + * order to speed up lookup from asm entry points + */ +struct cpu_stack { + union { + uint8_t stack[STACK_SIZE]; + struct cpu_thread cpu; + }; +} __align(STACK_SIZE); + +static struct cpu_stack *cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE; +unsigned int cpu_thread_count; +unsigned int cpu_max_pir; +struct cpu_thread *boot_cpu; +static struct lock reinit_lock = LOCK_UNLOCKED; + +unsigned long cpu_secondary_start __force_data = 0; + +struct cpu_job { + struct list_node link; + void (*func)(void *data); + void *data; + bool complete; + bool no_return; +}; + +/* attribute const as cpu_stacks is constant. */ +void __attrconst *cpu_stack_bottom(unsigned int pir) +{ + return (void *)&cpu_stacks[pir] + sizeof(struct cpu_thread); +} + +void __attrconst *cpu_stack_top(unsigned int pir) +{ + /* This is the top of the MC stack which is above the normal + * stack, which means a SP between cpu_stack_bottom() and + * cpu_stack_top() can either be a normal stack pointer or + * a Machine Check stack pointer + */ + return (void *)&cpu_stacks[pir] + STACK_SIZE - STACK_TOP_GAP; +} + +struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu, + void (*func)(void *data), void *data, + bool no_return) +{ + struct cpu_job *job; + + if (!cpu_is_available(cpu)) { + prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n", + cpu->pir); + return NULL; + } + + job = zalloc(sizeof(struct cpu_job)); + if (!job) + return NULL; + job->func = func; + job->data = data; + job->complete = false; + job->no_return = no_return; + + if (cpu != this_cpu()) { + lock(&cpu->job_lock); + list_add_tail(&cpu->job_queue, &job->link); + unlock(&cpu->job_lock); + } else { + func(data); + job->complete = true; + } + + /* XXX Add poking of CPU with interrupt */ + + return job; +} + +bool cpu_poll_job(struct cpu_job *job) +{ + lwsync(); + return job->complete; +} + +void cpu_wait_job(struct cpu_job *job, bool free_it) +{ + if (!job) + return; + + while(!job->complete) { + /* Handle mbox if master CPU */ + if (this_cpu() == boot_cpu) + fsp_poll(); + else + smt_low(); + lwsync(); + } + lwsync(); + smt_medium(); + + if (free_it) + free(job); +} + +void cpu_free_job(struct cpu_job *job) +{ + if (!job) + return; + + assert(job->complete); + free(job); +} + +void cpu_process_jobs(void) +{ + struct cpu_thread *cpu = this_cpu(); + struct cpu_job *job; + void (*func)(void *); + void *data; + + sync(); + if (list_empty(&cpu->job_queue)) + return; + + lock(&cpu->job_lock); + while (true) { + bool no_return; + + if (list_empty(&cpu->job_queue)) + break; + smt_medium(); + job = list_pop(&cpu->job_queue, struct cpu_job, link); + if (!job) + break; + func = job->func; + data = job->data; + no_return = job->no_return; + unlock(&cpu->job_lock); + if (no_return) + free(job); + func(data); + lock(&cpu->job_lock); + if (!no_return) { + lwsync(); + job->complete = true; + } + } + unlock(&cpu->job_lock); +} + +struct dt_node *get_cpu_node(u32 pir) +{ + struct cpu_thread *t = find_cpu_by_pir(pir); + + return t ? t->node : NULL; +} + +/* This only covers primary, active cpus */ +struct cpu_thread *find_cpu_by_chip_id(u32 chip_id) +{ + struct cpu_thread *t; + + for_each_available_cpu(t) { + if (t->is_secondary) + continue; + if (t->chip_id == chip_id) + return t; + } + return NULL; +} + +struct cpu_thread *find_cpu_by_node(struct dt_node *cpu) +{ + struct cpu_thread *t; + + for_each_available_cpu(t) { + if (t->node == cpu) + return t; + } + return NULL; +} + +struct cpu_thread *find_cpu_by_pir(u32 pir) +{ + if (pir > cpu_max_pir) + return NULL; + return &cpu_stacks[pir].cpu; +} + +struct cpu_thread *find_cpu_by_server(u32 server_no) +{ + struct cpu_thread *t; + + for_each_cpu(t) { + if (t->server_no == server_no) + return t; + } + return NULL; +} + +struct cpu_thread *next_cpu(struct cpu_thread *cpu) +{ + struct cpu_stack *s = container_of(cpu, struct cpu_stack, cpu); + unsigned int index; + + if (cpu == NULL) + index = 0; + else + index = s - cpu_stacks + 1; + for (; index <= cpu_max_pir; index++) { + cpu = &cpu_stacks[index].cpu; + if (cpu->state != cpu_state_no_cpu) + return cpu; + } + return NULL; +} + +struct cpu_thread *first_cpu(void) +{ + return next_cpu(NULL); +} + +struct cpu_thread *next_available_cpu(struct cpu_thread *cpu) +{ + do { + cpu = next_cpu(cpu); + } while(cpu && !cpu_is_available(cpu)); + + return cpu; +} + +struct cpu_thread *first_available_cpu(void) +{ + return next_available_cpu(NULL); +} + +struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core, + u32 chip_id) +{ + do { + core = next_cpu(core); + } while(core && (!cpu_is_available(core) || + core->chip_id != chip_id || + core->is_secondary)); + return core; +} + +struct cpu_thread *first_available_core_in_chip(u32 chip_id) +{ + return next_available_core_in_chip(NULL, chip_id); +} + +uint32_t cpu_get_core_index(struct cpu_thread *cpu) +{ + return pir_to_core_id(cpu->pir); +} + +void cpu_remove_node(const struct cpu_thread *t) +{ + struct dt_node *i; + + /* Find this cpu node */ + dt_for_each_node(dt_root, i) { + const struct dt_property *p; + + if (!dt_has_node_property(i, "device_type", "cpu")) + continue; + p = dt_find_property(i, "ibm,pir"); + if (dt_property_get_cell(p, 0) == t->pir) { + dt_free(i); + return; + } + } + prerror("CPU: Could not find cpu node %i to remove!\n", t->pir); + abort(); +} + +void cpu_disable_all_threads(struct cpu_thread *cpu) +{ + unsigned int i; + + for (i = 0; i <= cpu_max_pir; i++) { + struct cpu_thread *t = &cpu_stacks[i].cpu; + + if (t->primary == cpu->primary) + t->state = cpu_state_disabled; + } + + /* XXX Do something to actually stop the core */ +} + +static void init_cpu_thread(struct cpu_thread *t, + enum cpu_thread_state state, + unsigned int pir) +{ + init_lock(&t->job_lock); + list_head_init(&t->job_queue); + t->state = state; + t->pir = pir; + assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks); +} + +void pre_init_boot_cpu(void) +{ + struct cpu_thread *cpu = this_cpu(); + + memset(cpu, 0, sizeof(struct cpu_thread)); +} + +void init_boot_cpu(void) +{ + unsigned int i, pir, pvr; + + pir = mfspr(SPR_PIR); + pvr = mfspr(SPR_PVR); + + /* Get a CPU thread count and an initial max PIR based on PVR */ + switch(PVR_TYPE(pvr)) { + case PVR_TYPE_P7: + case PVR_TYPE_P7P: + cpu_thread_count = 4; + cpu_max_pir = SPR_PIR_P7_MASK; + proc_gen = proc_gen_p7; + printf("CPU: P7 generation processor\n"); + break; + case PVR_TYPE_P8E: + case PVR_TYPE_P8: + cpu_thread_count = 8; + cpu_max_pir = SPR_PIR_P8_MASK; + proc_gen = proc_gen_p8; + printf("CPU: P8 generation processor\n"); + break; + default: + prerror("CPU: Unknown PVR, assuming 1 thread\n"); + cpu_thread_count = 1; + cpu_max_pir = mfspr(SPR_PIR); + proc_gen = proc_gen_unknown; + } + + printf("CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n", pir, pvr); + printf("CPU: Initial max PIR set to 0x%x\n", cpu_max_pir); + printf("CPU: Assuming max %d threads per core\n", cpu_thread_count); + + /* Clear the CPU structs */ + for (i = 0; i <= cpu_max_pir; i++) + memset(&cpu_stacks[i].cpu, 0, sizeof(struct cpu_thread)); + + /* Setup boot CPU state */ + boot_cpu = &cpu_stacks[pir].cpu; + init_cpu_thread(boot_cpu, cpu_state_active, pir); + init_boot_tracebuf(boot_cpu); + assert(this_cpu() == boot_cpu); +} + +void init_all_cpus(void) +{ + struct dt_node *cpus, *cpu; + unsigned int thread, new_max_pir = 0; + + cpus = dt_find_by_path(dt_root, "/cpus"); + assert(cpus); + + /* Iterate all CPUs in the device-tree */ + dt_for_each_child(cpus, cpu) { + unsigned int pir, server_no, chip_id; + enum cpu_thread_state state; + const struct dt_property *p; + struct cpu_thread *t, *pt; + + /* Skip cache nodes */ + if (strcmp(dt_prop_get(cpu, "device_type"), "cpu")) + continue; + + server_no = dt_prop_get_u32(cpu, "reg"); + + /* If PIR property is absent, assume it's the same as the + * server number + */ + pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no); + + /* We should always have an ibm,chip-id property */ + chip_id = dt_get_chip_id(cpu); + + /* Only use operational CPUs */ + if (!strcmp(dt_prop_get(cpu, "status"), "okay")) + state = cpu_state_present; + else + state = cpu_state_unavailable; + + printf("CPU: CPU from DT PIR=0x%04x Server#=0x%x State=%d\n", + pir, server_no, state); + + /* Setup thread 0 */ + t = pt = &cpu_stacks[pir].cpu; + if (t != boot_cpu) { + init_cpu_thread(t, state, pir); + /* Each cpu gets its own later in init_trace_buffers */ + t->trace = boot_cpu->trace; + } + t->server_no = server_no; + t->primary = t; + t->node = cpu; + t->chip_id = chip_id; + t->icp_regs = 0; /* Will be set later */ + + /* Add associativity properties */ + add_core_associativity(t); + + /* Adjust max PIR */ + if (new_max_pir < (pir + cpu_thread_count - 1)) + new_max_pir = pir + cpu_thread_count - 1; + + /* Iterate threads */ + p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s"); + if (!p) + continue; + for (thread = 1; thread < (p->len / 4); thread++) { + printf("CPU: secondary thread %d found\n", thread); + t = &cpu_stacks[pir + thread].cpu; + init_cpu_thread(t, state, pir + thread); + t->trace = boot_cpu->trace; + t->server_no = ((const u32 *)p->prop)[thread]; + t->is_secondary = true; + t->primary = pt; + t->node = cpu; + t->chip_id = chip_id; + } + } + cpu_max_pir = new_max_pir; + printf("CPU: New max PIR set to 0x%x\n", new_max_pir); +} + +void cpu_bringup(void) +{ + struct cpu_thread *t; + + printf("CPU: Setting up secondary CPU state\n"); + + op_display(OP_LOG, OP_MOD_CPU, 0x0000); + + /* Tell everybody to chime in ! */ + printf("CPU: Calling in all processors...\n"); + cpu_secondary_start = 1; + sync(); + + op_display(OP_LOG, OP_MOD_CPU, 0x0002); + + for_each_cpu(t) { + if (t->state != cpu_state_present && + t->state != cpu_state_active) + continue; + + /* Add a callin timeout ? If so, call cpu_remove_node(t). */ + while (t->state != cpu_state_active) { + smt_very_low(); + sync(); + } + smt_medium(); + } + + printf("CPU: All processors called in...\n"); + + op_display(OP_LOG, OP_MOD_CPU, 0x0003); +} + +void cpu_callin(struct cpu_thread *cpu) +{ + cpu->state = cpu_state_active; +} + +static void opal_start_thread_job(void *data) +{ + cpu_give_self_os(); + + /* We do not return, so let's mark the job as + * complete + */ + start_kernel_secondary((uint64_t)data); +} + +static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address) +{ + struct cpu_thread *cpu; + struct cpu_job *job; + + cpu = find_cpu_by_server(server_no); + if (!cpu) { + prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no); + return OPAL_PARAMETER; + } + printf("OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n", + server_no, cpu->pir, start_address); + + lock(&reinit_lock); + if (!cpu_is_available(cpu)) { + unlock(&reinit_lock); + prerror("OPAL: CPU not active in OPAL !\n"); + return OPAL_WRONG_STATE; + } + job = __cpu_queue_job(cpu, opal_start_thread_job, (void *)start_address, + true); + unlock(&reinit_lock); + if (!job) { + prerror("OPAL: Failed to create CPU start job !\n"); + return OPAL_INTERNAL_ERROR; + } + return OPAL_SUCCESS; +} +opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2); + +static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status) +{ + struct cpu_thread *cpu; + + cpu = find_cpu_by_server(server_no); + if (!cpu) { + prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no); + return OPAL_PARAMETER; + } + if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) { + prerror("OPAL: CPU not active in OPAL nor OS !\n"); + return OPAL_PARAMETER; + } + switch(cpu->state) { + case cpu_state_os: + *thread_status = OPAL_THREAD_STARTED; + break; + case cpu_state_active: + /* Active in skiboot -> inactive in OS */ + *thread_status = OPAL_THREAD_INACTIVE; + break; + default: + *thread_status = OPAL_THREAD_UNAVAILABLE; + } + + return OPAL_SUCCESS; +} +opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2); + +static int64_t opal_return_cpu(void) +{ + printf("OPAL: Returning CPU 0x%04x\n", this_cpu()->pir); + + __secondary_cpu_entry(); + + return OPAL_HARDWARE; /* Should not happen */ +} +opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0); + +static void cpu_change_hile(void *hilep) +{ + bool hile = *(bool *)hilep; + unsigned long hid0; + + hid0 = mfspr(SPR_HID0); + if (hile) + hid0 |= SPR_HID0_HILE; + else + hid0 &= ~SPR_HID0_HILE; + printf("CPU: [%08x] HID0 set to 0x%016lx\n", this_cpu()->pir, hid0); + set_hid0(hid0); + + this_cpu()->current_hile = hile; +} + +static int64_t cpu_change_all_hile(bool hile) +{ + struct cpu_thread *cpu; + + printf("CPU: Switching HILE on all CPUs to %d\n", hile); + + for_each_available_cpu(cpu) { + if (cpu->current_hile == hile) + continue; + if (cpu == this_cpu()) { + cpu_change_hile(&hile); + continue; + } + cpu_wait_job(cpu_queue_job(cpu, cpu_change_hile, &hile), true); + } + return OPAL_SUCCESS; +} + +static int64_t opal_reinit_cpus(uint64_t flags) +{ + struct cpu_thread *cpu; + int64_t rc = OPAL_SUCCESS; + int i; + + lock(&reinit_lock); + + prerror("OPAL: Trying a CPU re-init with flags: 0x%llx\n", flags); + + for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) { + if (cpu == this_cpu()) + continue; + if (cpu->state == cpu_state_os) { + /* + * That might be a race with return CPU during kexec + * where we are still, wait a bit and try again + */ + for (i = 0; (i < 3) && (cpu->state == cpu_state_os); i++) + time_wait_ms(1); + if (cpu->state == cpu_state_os) { + prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir); + rc = OPAL_WRONG_STATE; + goto bail; + } + } + } + /* + * Now we need to mark ourselves "active" or we'll be skipped + * by the various "for_each_active_..." calls done by slw_reinit() + */ + this_cpu()->state = cpu_state_active; + + /* + * If the flags affect endianness and we are on P8 DD2 or later, then + * use the HID bit. We use the PVR (we could use the EC level in + * the chip but the PVR is more readily available). + */ + if (proc_gen == proc_gen_p8 && PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2 && + (flags & (OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE))) { + bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE); + + flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE); + rc = cpu_change_all_hile(hile); + } + + /* Any flags left ? */ + if (flags != 0) + rc = slw_reinit(flags); + + /* And undo the above */ + this_cpu()->state = cpu_state_os; + +bail: + unlock(&reinit_lock); + return rc; +} +opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1); diff --git a/core/device.c b/core/device.c new file mode 100644 index 0000000..28cccb7 --- /dev/null +++ b/core/device.c @@ -0,0 +1,791 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <device.h> +#include <stdlib.h> +#include <skiboot.h> +#include <libfdt/libfdt.h> +#include <libfdt/libfdt_internal.h> +#include <ccan/str/str.h> +#include <ccan/endian/endian.h> + +/* Used to give unique handles. */ +u32 last_phandle = 0; + +struct dt_node *dt_root; +struct dt_node *dt_chosen; + +static const char *take_name(const char *name) +{ + if (!is_rodata(name) && !(name = strdup(name))) { + prerror("Failed to allocate copy of name"); + abort(); + } + return name; +} + +static void free_name(const char *name) +{ + if (!is_rodata(name)) + free((char *)name); +} + +static struct dt_node *new_node(const char *name) +{ + struct dt_node *node = malloc(sizeof *node); + if (!node) { + prerror("Failed to allocate node\n"); + abort(); + } + + node->name = take_name(name); + node->parent = NULL; + list_head_init(&node->properties); + list_head_init(&node->children); + /* FIXME: locking? */ + node->phandle = ++last_phandle; + return node; +} + +struct dt_node *dt_new_root(const char *name) +{ + return new_node(name); +} + +bool dt_attach_root(struct dt_node *parent, struct dt_node *root) +{ + struct dt_node *node; + + /* Look for duplicates */ + + assert(!root->parent); + dt_for_each_child(parent, node) { + if (!strcmp(node->name, root->name)) { + prerror("DT: %s failed, duplicate %s\n", + __func__, root->name); + return false; + } + } + list_add_tail(&parent->children, &root->list); + root->parent = parent; + + return true; +} + +struct dt_node *dt_new(struct dt_node *parent, const char *name) +{ + struct dt_node *new; + assert(parent); + + new = new_node(name); + if (!dt_attach_root(parent, new)) { + free_name(new->name); + free(new); + return NULL; + } + return new; +} + +struct dt_node *dt_new_addr(struct dt_node *parent, const char *name, + uint64_t addr) +{ + char *lname; + struct dt_node *new; + size_t len; + + assert(parent); + len = strlen(name) + STR_MAX_CHARS(addr) + 2; + lname = malloc(len); + if (!lname) + return NULL; + snprintf(lname, len, "%s@%llx", name, (long long)addr); + new = new_node(lname); + free(lname); + if (!dt_attach_root(parent, new)) { + free_name(new->name); + free(new); + return NULL; + } + return new; +} + +struct dt_node *dt_new_2addr(struct dt_node *parent, const char *name, + uint64_t addr0, uint64_t addr1) +{ + char *lname; + struct dt_node *new; + size_t len; + assert(parent); + + len = strlen(name) + 2*STR_MAX_CHARS(addr0) + 3; + lname = malloc(len); + if (!lname) + return NULL; + snprintf(lname, len, "%s@%llx,%llx", + name, (long long)addr0, (long long)addr1); + new = new_node(lname); + free(lname); + if (!dt_attach_root(parent, new)) { + free_name(new->name); + free(new); + return NULL; + } + return new; +} + +char *dt_get_path(const struct dt_node *node) +{ + unsigned int len = 0; + const struct dt_node *n; + char *path, *p; + + /* Dealing with NULL is for test/debug purposes */ + if (!node) + return strdup("<NULL>"); + + for (n = node; n; n = n->parent) { + len += strlen(n->name); + if (n->parent || n == node) + len++; + } + path = zalloc(len + 1); + assert(path); + p = path + len; + for (n = node; n; n = n->parent) { + len = strlen(n->name); + p -= len; + memcpy(p, n->name, len); + if (n->parent || n == node) + *(--p) = '/'; + } + assert(p == path); + + return p; +} + +static const char *__dt_path_split(const char *p, + const char **namep, unsigned int *namel, + const char **addrp, unsigned int *addrl) +{ + const char *at, *sl; + + *namel = *addrl = 0; + + /* Skip initial '/' */ + while (*p == '/') + p++; + + /* Check empty path */ + if (*p == 0) + return p; + + at = strchr(p, '@'); + sl = strchr(p, '/'); + if (sl == NULL) + sl = p + strlen(p); + if (sl < at) + at = NULL; + if (at) { + *addrp = at + 1; + *addrl = sl - at - 1; + } + *namep = p; + *namel = at ? (at - p) : (sl - p); + + return sl; +} + +struct dt_node *dt_find_by_path(struct dt_node *root, const char *path) +{ + struct dt_node *n; + const char *pn, *pa, *p = path, *nn, *na; + unsigned int pnl, pal, nnl, nal; + bool match; + + /* Walk path components */ + while (*p) { + /* Extract next path component */ + p = __dt_path_split(p, &pn, &pnl, &pa, &pal); + if (pnl == 0 && pal == 0) + break; + + /* Compare with each child node */ + match = false; + list_for_each(&root->children, n, list) { + match = true; + __dt_path_split(n->name, &nn, &nnl, &na, &nal); + if (pnl && (pnl != nnl || strncmp(pn, nn, pnl))) + match = false; + if (pal && (pal != nal || strncmp(pa, na, pal))) + match = false; + if (match) { + root = n; + break; + } + } + + /* No child match */ + if (!match) + return NULL; + } + return root; +} + +struct dt_node *dt_find_by_phandle(struct dt_node *root, u32 phandle) +{ + struct dt_node *node; + + dt_for_each_node(root, node) + if (node->phandle == phandle) + return node; + return NULL; +} + +static struct dt_property *new_property(struct dt_node *node, + const char *name, size_t size) +{ + struct dt_property *p = malloc(sizeof(*p) + size); + if (!p) { + prerror("Failed to allocate property \"%s\" for %s of %zu bytes\n", + name, dt_get_path(node), size); + abort(); + } + if (dt_find_property(node, name)) { + prerror("Duplicate property \"%s\" in node %s\n", + name, dt_get_path(node)); + abort(); + + } + + p->name = take_name(name); + p->len = size; + list_add_tail(&node->properties, &p->list); + return p; +} + +struct dt_property *dt_add_property(struct dt_node *node, + const char *name, + const void *val, size_t size) +{ + struct dt_property *p; + + /* + * Filter out phandle properties, we re-generate them + * when flattening + */ + if (strcmp(name, "linux,phandle") == 0 || + strcmp(name, "phandle") == 0) { + assert(size == 4); + node->phandle = *(const u32 *)val; + if (node->phandle >= last_phandle) + last_phandle = node->phandle; + return NULL; + } + + p = new_property(node, name, size); + if (size) + memcpy(p->prop, val, size); + return p; +} + +void dt_resize_property(struct dt_property **prop, size_t len) +{ + size_t new_len = sizeof(**prop) + len; + + *prop = realloc(*prop, new_len); + + /* Fix up linked lists in case we moved. (note: not an empty list). */ + (*prop)->list.next->prev = &(*prop)->list; + (*prop)->list.prev->next = &(*prop)->list; +} + +struct dt_property *dt_add_property_string(struct dt_node *node, + const char *name, + const char *value) +{ + return dt_add_property(node, name, value, strlen(value)+1); +} + +struct dt_property *dt_add_property_nstr(struct dt_node *node, + const char *name, + const char *value, unsigned int vlen) +{ + struct dt_property *p; + char *tmp = zalloc(vlen + 1); + + strncpy(tmp, value, vlen); + p = dt_add_property(node, name, tmp, strlen(tmp)+1); + free(tmp); + + return p; +} + +struct dt_property *__dt_add_property_cells(struct dt_node *node, + const char *name, + int count, ...) +{ + struct dt_property *p; + u32 *val; + unsigned int i; + va_list args; + + p = new_property(node, name, count * sizeof(u32)); + val = (u32 *)p->prop; + va_start(args, count); + for (i = 0; i < count; i++) + val[i] = cpu_to_fdt32(va_arg(args, u32)); + va_end(args); + return p; +} + +struct dt_property *__dt_add_property_u64s(struct dt_node *node, + const char *name, + int count, ...) +{ + struct dt_property *p; + u64 *val; + unsigned int i; + va_list args; + + p = new_property(node, name, count * sizeof(u64)); + val = (u64 *)p->prop; + va_start(args, count); + for (i = 0; i < count; i++) + val[i] = cpu_to_fdt64(va_arg(args, u64)); + va_end(args); + return p; +} + +struct dt_property *__dt_add_property_strings(struct dt_node *node, + const char *name, + int count, ...) +{ + struct dt_property *p; + unsigned int i, size; + va_list args; + const char *sstr; + char *s; + + va_start(args, count); + for (i = size = 0; i < count; i++) { + sstr = va_arg(args, const char *); + if (sstr) + size += strlen(sstr) + 1; + } + va_end(args); + if (!size) + size = 1; + p = new_property(node, name, size); + s = (char *)p->prop; + *s = 0; + va_start(args, count); + for (i = 0; i < count; i++) { + sstr = va_arg(args, const char *); + if (sstr) { + strcpy(s, sstr); + s = s + strlen(sstr) + 1; + } + } + va_end(args); + return p; +} + +void dt_del_property(struct dt_node *node, struct dt_property *prop) +{ + list_del_from(&node->properties, &prop->list); + free_name(prop->name); + free(prop); +} + +u32 dt_property_get_cell(const struct dt_property *prop, u32 index) +{ + assert(prop->len >= (index+1)*sizeof(u32)); + /* Always aligned, so this works. */ + return fdt32_to_cpu(((const u32 *)prop->prop)[index]); +} + +/* First child of this node. */ +struct dt_node *dt_first(const struct dt_node *root) +{ + return list_top(&root->children, struct dt_node, list); +} + +/* Return next node, or NULL. */ +struct dt_node *dt_next(const struct dt_node *root, + const struct dt_node *prev) +{ + /* Children? */ + if (!list_empty(&prev->children)) + return dt_first(prev); + + do { + /* More siblings? */ + if (prev->list.next != &prev->parent->children.n) + return list_entry(prev->list.next, struct dt_node,list); + + /* No more siblings, move up to parent. */ + prev = prev->parent; + } while (prev != root); + + return NULL; +} + +struct dt_property *__dt_find_property(struct dt_node *node, const char *name) +{ + struct dt_property *i; + + list_for_each(&node->properties, i, list) + if (strcmp(i->name, name) == 0) + return i; + return NULL; +} + +const struct dt_property *dt_find_property(const struct dt_node *node, + const char *name) +{ + const struct dt_property *i; + + list_for_each(&node->properties, i, list) + if (strcmp(i->name, name) == 0) + return i; + return NULL; +} + +const struct dt_property *dt_require_property(const struct dt_node *node, + const char *name, int wanted_len) +{ + const struct dt_property *p = dt_find_property(node, name); + + if (!p) { + const char *path = dt_get_path(node); + + prerror("DT: Missing required property %s/%s\n", + path, name); + assert(false); + } + if (wanted_len >= 0 && p->len != wanted_len) { + const char *path = dt_get_path(node); + + prerror("DT: Unexpected property length %s/%s\n", + path, name); + prerror("DT: Expected len: %d got len: %zu\n", + wanted_len, p->len); + assert(false); + } + + return p; +} + +bool dt_has_node_property(const struct dt_node *node, + const char *name, const char *val) +{ + const struct dt_property *p = dt_find_property(node, name); + + if (!p) + return false; + if (!val) + return true; + + return p->len == strlen(val) + 1 && memcmp(p->prop, val, p->len) == 0; +} + +bool dt_prop_find_string(const struct dt_property *p, const char *s) +{ + const char *c, *end; + + if (!p) + return false; + c = p->prop; + end = c + p->len; + + while(c < end) { + if (!strcasecmp(s, c)) + return true; + c += strlen(c) + 1; + } + return false; +} + +bool dt_node_is_compatible(const struct dt_node *node, const char *compat) +{ + const struct dt_property *p = dt_find_property(node, "compatible"); + + return dt_prop_find_string(p, compat); +} + +struct dt_node *dt_find_compatible_node(struct dt_node *root, + struct dt_node *prev, + const char *compat) +{ + struct dt_node *node; + + node = prev ? dt_next(root, prev) : root; + for (; node; node = dt_next(root, node)) + if (dt_node_is_compatible(node, compat)) + return node; + return NULL; +} + +u64 dt_prop_get_u64(const struct dt_node *node, const char *prop) +{ + const struct dt_property *p = dt_require_property(node, prop, 8); + + return ((u64)dt_property_get_cell(p, 0) << 32) + | dt_property_get_cell(p, 1); +} + +u64 dt_prop_get_u64_def(const struct dt_node *node, const char *prop, u64 def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + if (!p) + return def; + + return ((u64)dt_property_get_cell(p, 0) << 32) + | dt_property_get_cell(p, 1); +} + +u32 dt_prop_get_u32(const struct dt_node *node, const char *prop) +{ + const struct dt_property *p = dt_require_property(node, prop, 4); + + return dt_property_get_cell(p, 0); +} + +u32 dt_prop_get_u32_def(const struct dt_node *node, const char *prop, u32 def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + if (!p) + return def; + + return dt_property_get_cell(p, 0); +} + +const void *dt_prop_get(const struct dt_node *node, const char *prop) +{ + const struct dt_property *p = dt_require_property(node, prop, -1); + + return p->prop; +} + +const void *dt_prop_get_def(const struct dt_node *node, const char *prop, + void *def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + return p ? p->prop : def; +} + +const void *dt_prop_get_def_size(const struct dt_node *node, const char *prop, + void *def, size_t *len) +{ + const struct dt_property *p = dt_find_property(node, prop); + *len = 0; + if (p) + *len = p->len; + + return p ? p->prop : def; +} + +u32 dt_prop_get_cell(const struct dt_node *node, const char *prop, u32 cell) +{ + const struct dt_property *p = dt_require_property(node, prop, -1); + + return dt_property_get_cell(p, cell); +} + +u32 dt_prop_get_cell_def(const struct dt_node *node, const char *prop, + u32 cell, u32 def) +{ + const struct dt_property *p = dt_find_property(node, prop); + + if (!p) + return def; + + return dt_property_get_cell(p, cell); +} + +void dt_free(struct dt_node *node) +{ + struct dt_node *child; + struct dt_property *p; + + while ((child = list_top(&node->children, struct dt_node, list))) + dt_free(child); + + while ((p = list_pop(&node->properties, struct dt_property, list))) { + free_name(p->name); + free(p); + } + + if (node->parent) + list_del_from(&node->parent->children, &node->list); + free_name(node->name); + free(node); +} + +int dt_expand_node(struct dt_node *node, const void *fdt, int fdt_node) +{ + const struct fdt_property *prop; + int offset, nextoffset, err; + struct dt_node *child; + const char *name; + uint32_t tag; + + if (((err = fdt_check_header(fdt)) != 0) + || ((err = _fdt_check_node_offset(fdt, fdt_node)) < 0)) { + prerror("FDT: Error %d parsing node 0x%x\n", err, fdt_node); + return -1; + } + + nextoffset = err; + do { + offset = nextoffset; + + tag = fdt_next_tag(fdt, offset, &nextoffset); + switch (tag) { + case FDT_PROP: + prop = _fdt_offset_ptr(fdt, offset); + name = fdt_string(fdt, fdt32_to_cpu(prop->nameoff)); + dt_add_property(node, name, prop->data, + fdt32_to_cpu(prop->len)); + break; + case FDT_BEGIN_NODE: + name = fdt_get_name(fdt, offset, NULL); + child = dt_new_root(name); + assert(child); + nextoffset = dt_expand_node(child, fdt, offset); + + /* + * This may fail in case of duplicate, keep it + * going for now, we may ultimately want to + * assert + */ + (void)dt_attach_root(node, child); + break; + case FDT_END: + return -1; + } + } while (tag != FDT_END_NODE); + + return nextoffset; +} + +void dt_expand(const void *fdt) +{ + printf("FDT: Parsing fdt @%p\n", fdt); + + dt_root = dt_new_root(""); + + dt_expand_node(dt_root, fdt, 0); +} + +u64 dt_get_number(const void *pdata, unsigned int cells) +{ + const u32 *p = pdata; + u64 ret = 0; + + while(cells--) + ret = (ret << 32) | be32_to_cpu(*(p++)); + return ret; +} + +u32 dt_n_address_cells(const struct dt_node *node) +{ + if (!node->parent) + return 0; + return dt_prop_get_u32_def(node->parent, "#address-cells", 2); +} + +u32 dt_n_size_cells(const struct dt_node *node) +{ + if (!node->parent) + return 0; + return dt_prop_get_u32_def(node->parent, "#size-cells", 1); +} + +u64 dt_get_address(const struct dt_node *node, unsigned int index, + u64 *out_size) +{ + const struct dt_property *p; + u32 na = dt_n_address_cells(node); + u32 ns = dt_n_size_cells(node); + u32 pos, n; + + p = dt_require_property(node, "reg", -1); + n = (na + ns) * sizeof(u32); + pos = n * index; + assert((pos + n) <= p->len); + if (out_size) + *out_size = dt_get_number(p->prop + pos + na * sizeof(u32), ns); + return dt_get_number(p->prop + pos, na); +} + +static u32 __dt_get_chip_id(const struct dt_node *node) +{ + const struct dt_property *prop; + + for (; node; node = node->parent) { + prop = dt_find_property(node, "ibm,chip-id"); + if (prop) + return dt_property_get_cell(prop, 0); + } + return 0xffffffff; +} + +u32 dt_get_chip_id(const struct dt_node *node) +{ + u32 id = __dt_get_chip_id(node); + assert(id != 0xffffffff); + return id; +} + +struct dt_node *dt_find_compatible_node_on_chip(struct dt_node *root, + struct dt_node *prev, + const char *compat, + uint32_t chip_id) +{ + struct dt_node *node; + + node = prev ? dt_next(root, prev) : root; + for (; node; node = dt_next(root, node)) { + u32 cid = __dt_get_chip_id(node); + if (cid == chip_id && + dt_node_is_compatible(node, compat)) + return node; + } + return NULL; +} + +unsigned int dt_count_addresses(const struct dt_node *node) +{ + const struct dt_property *p; + u32 na = dt_n_address_cells(node); + u32 ns = dt_n_size_cells(node); + u32 n; + + p = dt_require_property(node, "reg", -1); + n = (na + ns) * sizeof(u32); + return p->len / n; +} + +u64 dt_translate_address(const struct dt_node *node, unsigned int index, + u64 *out_size) +{ + /* XXX TODO */ + return dt_get_address(node, index, out_size); +} diff --git a/core/exceptions.c b/core/exceptions.c new file mode 100644 index 0000000..995ca92 --- /dev/null +++ b/core/exceptions.c @@ -0,0 +1,529 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <stack.h> +#include <opal.h> +#include <processor.h> +#include <cpu.h> + +static uint64_t client_mc_address; + +extern uint8_t exc_primary_start; +extern uint8_t exc_primary_end; + +extern uint32_t exc_primary_patch_branch; + +extern uint8_t exc_secondary_start; +extern uint8_t exc_secondary_end; + +extern uint32_t exc_secondary_patch_stack; +extern uint32_t exc_secondary_patch_mfsrr0; +extern uint32_t exc_secondary_patch_mfsrr1; +extern uint32_t exc_secondary_patch_type; +extern uint32_t exc_secondary_patch_mtsrr0; +extern uint32_t exc_secondary_patch_mtsrr1; +extern uint32_t exc_secondary_patch_rfid; + +struct lock hmi_lock = LOCK_UNLOCKED; + +#define SRR1_MC_LOADSTORE(srr1) ((srr1) & PPC_BIT(42)) + +#define SRR1_MC_IFETCH(srr1) ((srr1) & PPC_BITMASK(43,45)) +#define SRR1_MC_IFETCH_UE (0x1 << PPC_BITLSHIFT(45)) +#define SRR1_MC_IFETCH_SLB_PARITY (0x2 << PPC_BITLSHIFT(45)) +#define SRR1_MC_IFETCH_SLB_MULTIHIT (0x3 << PPC_BITLSHIFT(45)) +#define SRR1_MC_IFETCH_SLB_BOTH (0x4 << PPC_BITLSHIFT(45)) +#define SRR1_MC_IFETCH_TLB_MULTIHIT (0x5 << PPC_BITLSHIFT(45)) +#define SRR1_MC_IFETCH_UE_TLB_RELOAD (0x6 << PPC_BITLSHIFT(45)) +#define SRR1_MC_IFETCH_UE_IFU_INTERNAL (0x7 << PPC_BITLSHIFT(45)) + +#define DSISR_MC_UE (PPC_BIT(48)) +#define DSISR_MC_UE_TABLEWALK (PPC_BIT(49)) +#define DSISR_MC_ERAT_MULTIHIT (PPC_BIT(52)) +#define DSISR_MC_TLB_MULTIHIT_MFTLB (PPC_BIT(53)) +#define DSISR_MC_TLB_MULTIHIT_MFSLB (PPC_BIT(55)) +#define DSISR_MC_TLB_MULTIHIT (PPC_BIT(53) | PPC_BIT(55)) +#define DSISR_MC_SLB_MULTIHIT (PPC_BIT(56)) +#define DSISR_MC_SLB_MULTIHIT_PARITY (PPC_BIT(57)) + +static void mce_set_ierror(struct opal_machine_check_event *mce, uint64_t srr1) +{ + switch (SRR1_MC_IFETCH(srr1)) { + case SRR1_MC_IFETCH_SLB_PARITY: + mce->error_type = OpalMCE_ERROR_TYPE_SLB; + mce->u.slb_error.slb_error_type = OpalMCE_SLB_ERROR_PARITY; + break; + + case SRR1_MC_IFETCH_SLB_MULTIHIT: + mce->error_type = OpalMCE_ERROR_TYPE_SLB; + mce->u.slb_error.slb_error_type = OpalMCE_SLB_ERROR_MULTIHIT; + break; + + case SRR1_MC_IFETCH_SLB_BOTH: + mce->error_type = OpalMCE_ERROR_TYPE_SLB; + mce->u.slb_error.slb_error_type = + OpalMCE_SLB_ERROR_INDETERMINATE; + break; + + case SRR1_MC_IFETCH_TLB_MULTIHIT: + mce->error_type = OpalMCE_ERROR_TYPE_TLB; + mce->u.tlb_error.tlb_error_type = OpalMCE_TLB_ERROR_MULTIHIT; + break; + + case SRR1_MC_IFETCH_UE: + case SRR1_MC_IFETCH_UE_IFU_INTERNAL: + mce->error_type = OpalMCE_ERROR_TYPE_UE; + mce->u.ue_error.ue_error_type = OpalMCE_UE_ERROR_IFETCH; + break; + + case SRR1_MC_IFETCH_UE_TLB_RELOAD: + mce->error_type = OpalMCE_ERROR_TYPE_UE; + mce->u.ue_error.ue_error_type = + OpalMCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH; + break; + } + +} + +static void mce_set_derror(struct opal_machine_check_event *mce, uint64_t dsisr) +{ + if (dsisr & DSISR_MC_UE) { + mce->error_type = OpalMCE_ERROR_TYPE_UE; + mce->u.ue_error.ue_error_type = OpalMCE_UE_ERROR_LOAD_STORE; + + } else if (dsisr & DSISR_MC_UE_TABLEWALK) { + mce->error_type = OpalMCE_ERROR_TYPE_UE; + mce->u.ue_error.ue_error_type = + OpalMCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE; + + } else if (dsisr & DSISR_MC_ERAT_MULTIHIT) { + mce->error_type = OpalMCE_ERROR_TYPE_ERAT; + mce->u.erat_error.erat_error_type = + OpalMCE_ERAT_ERROR_MULTIHIT; + + } else if (dsisr & DSISR_MC_TLB_MULTIHIT) { + mce->error_type = OpalMCE_ERROR_TYPE_TLB; + mce->u.tlb_error.tlb_error_type = + OpalMCE_TLB_ERROR_MULTIHIT; + + } else if (dsisr & DSISR_MC_SLB_MULTIHIT) { + mce->error_type = OpalMCE_ERROR_TYPE_SLB; + mce->u.slb_error.slb_error_type = + OpalMCE_SLB_ERROR_MULTIHIT; + + } else if (dsisr & DSISR_MC_SLB_MULTIHIT_PARITY) { + mce->error_type = OpalMCE_ERROR_TYPE_SLB; + mce->u.slb_error.slb_error_type = + OpalMCE_SLB_ERROR_INDETERMINATE; + } +} + +/* Called from head.S, thus no prototype */ +void handle_machine_check(struct stack_frame *stack); + +void handle_machine_check(struct stack_frame *stack) +{ + struct opal_machine_check_event *mce; + uint64_t srr1, addr; + + mce = &this_cpu()->mc_event; + + /* This will occur if we get another MC between the time that + * we re-set MSR_ME, and the OS clears this flag. + * + * However, the alternative is keeping MSR_ME cleared, and letting + * the OS re-set it (after clearing the flag). However, we + * risk a checkstop, and an opal assert() is the better option. + */ + assert(!mce->in_use); + + mce->in_use = 1; + + /* Populate generic machine check info */ + mce->version = OpalMCE_V1; + mce->srr0 = stack->srr0; + mce->srr1 = stack->srr1; + mce->gpr3 = stack->gpr[3]; + + mce->initiator = OpalMCE_INITIATOR_CPU; + mce->disposition = OpalMCE_DISPOSITION_NOT_RECOVERED; + mce->severity = OpalMCE_SEV_ERROR_SYNC; + + srr1 = stack->srr1; + + /* Populate the mce error_type and type-specific error_type from either + * SRR1 or DSISR, depending whether this was a load/store or ifetch + * exception */ + if (SRR1_MC_LOADSTORE(srr1)) { + mce_set_derror(mce, srr1); + addr = stack->srr0; + } else { + mce_set_ierror(mce, mfspr(SPR_DSISR)); + addr = mfspr(SPR_DAR); + } + + if (mce->error_type == OpalMCE_ERROR_TYPE_TLB) { + mce->u.tlb_error.effective_address_provided = true; + mce->u.tlb_error.effective_address = addr; + + } else if (mce->error_type == OpalMCE_ERROR_TYPE_SLB) { + mce->u.slb_error.effective_address_provided = true; + mce->u.slb_error.effective_address = addr; + + } else if (mce->error_type == OpalMCE_ERROR_TYPE_ERAT) { + mce->u.erat_error.effective_address_provided = true; + mce->u.erat_error.effective_address = addr; + + } else if (mce->error_type == OpalMCE_ERROR_TYPE_UE) { + mce->u.ue_error.effective_address_provided = true; + mce->u.ue_error.effective_address = addr; + } + + /* Setup stack to rfi into the OS' handler, with ME re-enabled. */ + stack->gpr[3] = (uint64_t)mce; + stack->srr0 = client_mc_address; + stack->srr1 = mfmsr() | MSR_ME; +} + +#define REG "%016llx" +#define REGS_PER_LINE 4 +#define LAST_VOLATILE 13 + +static void dump_regs(struct stack_frame *stack, uint64_t hmer) +{ + int i; + uint64_t tfmr; + + if (hmer & SPR_HMER_MALFUNCTION_ALERT) + printf("HMI: malfunction Alert\n"); + if (hmer & SPR_HMER_HYP_RESOURCE_ERR) + printf("HMI: Hypervisor resource error.\n"); + if (hmer & SPR_HMER_TFAC_ERROR) { + tfmr = mfspr(SPR_TFMR); + printf("HMI: TFAC error: SPRN_TFMR = 0x%016llx\n", tfmr); + } + if (hmer & SPR_HMER_TFMR_PARITY_ERROR) { + tfmr = mfspr(SPR_TFMR); + printf("HMI: TFMR parity error: SPRN_TFMR = 0x%016llx\n", tfmr); + } + printf("TRAP: %04llx\n", stack->type); + printf("SRR0: "REG" SRR1: "REG"\n", stack->srr0, stack->srr1); + printf("CFAR: "REG" LR: "REG" CTR: "REG"\n", + stack->cfar, stack->lr, stack->ctr); + printf(" CR: %08x XER: %08x\n", stack->cr, stack->xer); + + for (i = 0; i < 32; i++) { + if ((i % REGS_PER_LINE) == 0) + printf("\nGPR%02d: ", i); + printf(REG " ", stack->gpr[i]); + if (i == LAST_VOLATILE) + break; + } + printf("\n"); +} + +/* + * HMER register layout: + * +===+==========+============================+========+===================+ + * |Bit|Name |Description |PowerKVM|Action | + * | | | |HMI | | + * | | | |enabled | | + * | | | |for this| | + * | | | |bit ? | | + * +===+==========+============================+========+===================+ + * |0 |malfunctio|A processor core in the |Yes |Raise attn from | + * | |n_allert |system has checkstopped | |sapphire resulting | + * | | |(failed recovery) and has | |xstop | + * | | |requested a CP Sparing | | | + * | | |to occur. This is | | | + * | | |broadcasted to every | | | + * | | |processor in the system | | | + * |---+----------+----------------------------+--------+-------------------| + * |1 |Reserved |reserved |n/a | | + * |---+----------+----------------------------+--------+-------------------| + * |2 |proc_recv_|Processor recovery occurred |Yes |Log message and | + * | |done |error-bit in fir not masked | |continue working. | + * | | |(see bit 11) | | | + * |---+----------+----------------------------+--------+-------------------| + * |3 |proc_recv_|Processor went through |Yes |Log message and | + * | |error_mask|recovery for an error which | |continue working. | + * | |ed |is actually masked for | | | + * | | |reporting | | | + * |---+----------+----------------------------+--------+-------------------| + * |4 | |Timer facility experienced |Yes |Raise attn from | + * | |tfac_error|an error. | |sapphire resulting | + * | | |TB, DEC, HDEC, PURR or SPURR| |xstop | + * | | |may be corrupted (details in| | | + * | | |TFMR) | | | + * |---+----------+----------------------------+--------+-------------------| + * |5 | |TFMR SPR itself is |Yes |Raise attn from | + * | |tfmr_parit|corrupted. | |sapphire resulting | + * | |y_error |Entire timing facility may | |xstop | + * | | |be compromised. | | | + * |---+----------+----------------------------+--------+-------------------| + * |6 |ha_overflo| UPS (Uniterrupted Power |No |N/A | + * | |w_warning |System) Overflow indication | | | + * | | |indicating that the UPS | | | + * | | |DirtyAddrTable has | | | + * | | |reached a limit where it | | | + * | | |requires PHYP unload support| | | + * |---+----------+----------------------------+--------+-------------------| + * |7 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |8 |xscom_fail|An XSCOM operation caused by|No |We handle it by | + * | | |a cache inhibited load/store| |manually reading | + * | | |from this thread failed. A | |HMER register. | + * | | |trap register is | | | + * | | |available. | | | + * | | | | | | + * |---+----------+----------------------------+--------+-------------------| + * |9 |xscom_done|An XSCOM operation caused by|No |We handle it by | + * | | |a cache inhibited load/store| |manually reading | + * | | |from this thread completed. | |HMER register. | + * | | |If hypervisor | | | + * | | |intends to use this bit, it | | | + * | | |is responsible for clearing | | | + * | | |it before performing the | | | + * | | |xscom operation. | | | + * | | |NOTE: this bit should always| | | + * | | |be masked in HMEER | | | + * |---+----------+----------------------------+--------+-------------------| + * |10 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |11 |proc_recv_|Processor recovery occurred |y |Log message and | + * | |again |again before bit2 or bit3 | |continue working. | + * | | |was cleared | | | + * |---+----------+----------------------------+--------+-------------------| + * |12-|reserved |was temperature sensor |n/a |n/a | + * |15 | |passed the critical point on| | | + * | | |the way up | | | + * |---+----------+----------------------------+--------+-------------------| + * |16 | |SCOM has set a reserved FIR |No |n/a | + * | |scom_fir_h|bit to cause recovery | | | + * | |m | | | | + * |---+----------+----------------------------+--------+-------------------| + * |17 |trig_fir_h|Debug trigger has set a |No |n/a | + * | |mi |reserved FIR bit to cause | | | + * | | |recovery | | | + * |---+----------+----------------------------+--------+-------------------| + * |18 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |19 |reserved |reserved |n/a |n/a | + * |---+----------+----------------------------+--------+-------------------| + * |20 |hyp_resour|A hypervisor resource error |y |Raise attn from | + * | |ce_err |occurred: data parity error | |sapphire resulting | + * | | |on, SPRC0:3; SPR_Modereg or | |xstop. | + * | | |HMEER. | | | + * | | |Note: this bit will cause an| | | + * | | |check_stop when (HV=1, PR=0 | | | + * | | |and EE=0) | | | + * |---+----------+----------------------------+--------+-------------------| + * |21-| |if bit 8 is active, the |No |We handle it by | + * |23 |xscom_stat|reason will be detailed in | |Manually reading | + * | |us |these bits. see chapter 11.1| |HMER register. | + * | | |This bits are information | | | + * | | |only and always masked | | | + * | | |(mask = '0') | | | + * | | |If hypervisor intends to use| | | + * | | |this bit, it is responsible | | | + * | | |for clearing it before | | | + * | | |performing the xscom | | | + * | | |operation. | | | + * |---+----------+----------------------------+--------+-------------------| + * |24-|Not |Not implemented |n/a |n/a | + * |63 |implemente| | | | + * | |d | | | | + * +-- +----------+----------------------------+--------+-------------------+ + * + * Above HMER bits can be enabled/disabled by modifying + * SPR_HMEER_HMI_ENABLE_MASK #define in include/processor.h + * If you modify support for any of the bits listed above, please make sure + * you change the above table to refelct that. + * + * NOTE: Per Dave Larson, never enable 8,9,21-23 + */ + +/* make compiler happy with a prototype */ +void handle_hmi(struct stack_frame *stack); + +void handle_hmi(struct stack_frame *stack) +{ + uint64_t hmer, orig_hmer; + bool assert = false; + + orig_hmer = hmer = mfspr(SPR_HMER); + printf("HMI: Received HMI interrupt: HMER = 0x%016llx\n", hmer); + if (hmer & (SPR_HMER_PROC_RECV_DONE + | SPR_HMER_PROC_RECV_ERROR_MASKED)) { + hmer &= ~(SPR_HMER_PROC_RECV_DONE + | SPR_HMER_PROC_RECV_ERROR_MASKED); + printf("HMI: Processor recovery Done.\n"); + } + if (hmer & SPR_HMER_PROC_RECV_AGAIN) { + hmer &= ~SPR_HMER_PROC_RECV_AGAIN; + printf("HMI: Processor recovery occurred again before" + "bit2 was cleared\n"); + } + /* Assert if we see malfunction alert, we can not continue. */ + if (hmer & SPR_HMER_MALFUNCTION_ALERT) { + hmer &= ~SPR_HMER_MALFUNCTION_ALERT; + assert = true; + } + + /* Assert if we see Hypervisor resource error, we can not continue. */ + if (hmer & SPR_HMER_HYP_RESOURCE_ERR) { + hmer &= ~SPR_HMER_HYP_RESOURCE_ERR; + assert = true; + } + + /* + * Assert for now for all TOD errors. In future we need to decode + * TFMR and take corrective action wherever required. + */ + if (hmer & (SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR)) { + hmer &= ~(SPR_HMER_TFAC_ERROR | SPR_HMER_TFMR_PARITY_ERROR); + assert = true; + } + + /* + * HMER bits are sticky, once set to 1 they remain set to 1 until + * they are set to 0. Reset the error source bit to 0, otherwise + * we keep getting HMI interrupt again and again. + */ + mtspr(SPR_HMER, hmer); + if (!assert) + return; + + /* + * Raise attn to crash. + * + * We get HMI on all threads at the same time. Using locks to avoid + * printf messages jumbled up. + */ + lock(&hmi_lock); + dump_regs(stack, orig_hmer); + /* Should we unlock? We are going down anyway. */ + unlock(&hmi_lock); + assert(false); +} + +/* Called from head.S, thus no prototype */ +void exception_entry(struct stack_frame *stack); + +void exception_entry(struct stack_frame *stack) +{ + switch(stack->type) { + case STACK_ENTRY_MCHECK: + handle_machine_check(stack); + break; + case STACK_ENTRY_HMI: + handle_hmi(stack); + /* XXX TODO : Implement machine check */ + break; + case STACK_ENTRY_SOFTPATCH: + /* XXX TODO : Implement softpatch ? */ + break; + } +} + +static int64_t patch_exception(uint64_t vector, uint64_t glue, bool hv) +{ + uint64_t iaddr; + + /* Copy over primary exception handler */ + memcpy((void *)vector, &exc_primary_start, + &exc_primary_end - &exc_primary_start); + + /* Patch branch instruction in primary handler */ + iaddr = vector + exc_primary_patch_branch; + *(uint32_t *)iaddr |= (glue - iaddr) & 0x03fffffc; + + /* Copy over secondary exception handler */ + memcpy((void *)glue, &exc_secondary_start, + &exc_secondary_end - &exc_secondary_start); + + /* Patch-in the vector number */ + *(uint32_t *)(glue + exc_secondary_patch_type) |= vector; + + /* + * If machine check, patch GET_STACK to get to the MC stack + * instead of the normal stack. + * + * To simplify the arithmetic involved I make assumptions + * on the fact that the base of all CPU stacks is 64k aligned + * and that our stack size is < 32k, which means that the + * "addi" instruction used in GET_STACK() is always using a + * small (<32k) positive offset, which we can then easily + * fixup with a simple addition + */ + BUILD_ASSERT(STACK_SIZE < 0x8000); + BUILD_ASSERT(!(CPU_STACKS_BASE & 0xffff)); + + if (vector == 0x200) { + /* + * The addi we try to patch is the 3rd instruction + * of GET_STACK(). If you change the macro, you must + * update this code + */ + iaddr = glue + exc_secondary_patch_stack + 8; + *(uint32_t *)iaddr += MC_STACK_SIZE; + } + + /* Standard exception ? All done */ + if (!hv) + goto flush; + + /* HV exception, change the SRR's to HSRRs and rfid to hrfid + * + * The magic is that mfspr/mtspr of SRR can be turned into the + * equivalent HSRR version by OR'ing 0x4800. For rfid to hrfid + * we OR 0x200. + */ + *(uint32_t *)(glue + exc_secondary_patch_mfsrr0) |= 0x4800; + *(uint32_t *)(glue + exc_secondary_patch_mfsrr1) |= 0x4800; + *(uint32_t *)(glue + exc_secondary_patch_mtsrr0) |= 0x4800; + *(uint32_t *)(glue + exc_secondary_patch_mtsrr1) |= 0x4800; + *(uint32_t *)(glue + exc_secondary_patch_rfid) |= 0x200; + + flush: + /* On P7 and later all we need is : */ + sync_icache(); + + return OPAL_SUCCESS; +} + +static int64_t opal_register_exc_handler(uint64_t opal_exception, + uint64_t handler_address, + uint64_t glue_cache_line) +{ + switch(opal_exception) { + case OPAL_MACHINE_CHECK_HANDLER: + client_mc_address = handler_address; + return patch_exception(0x200, glue_cache_line, false); + case OPAL_HYPERVISOR_MAINTENANCE_HANDLER: + return patch_exception(0xe60, glue_cache_line, true); +#if 0 /* We let Linux handle softpatch */ + case OPAL_SOFTPATCH_HANDLER: + return patch_exception(0x1500, glue_cache_line, true); +#endif + default: + break; + } + return OPAL_PARAMETER; +} +opal_call(OPAL_REGISTER_OPAL_EXCEPTION_HANDLER, opal_register_exc_handler, 3); + diff --git a/core/fast-reboot.c b/core/fast-reboot.c new file mode 100644 index 0000000..49b80b6 --- /dev/null +++ b/core/fast-reboot.c @@ -0,0 +1,346 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <fsp.h> +#include <psi.h> +#include <opal.h> +#include <xscom.h> +#include <interrupts.h> +#include <cec.h> +#include <timebase.h> +#include <memory.h> +#include <pci.h> +#include <chip.h> + +/* + * To get control of all threads, we sreset them via XSCOM after + * patching the 0x100 vector. This will work as long as the target + * HRMOR is 0. If Linux ever uses HRMOR, we'll have to consider + * a more messy approach. + * + * The SCOM register we want is called "Core RAS Control" in the doc + * and EX0.EC.PC.TCTL_GENERATE#0.TCTL.DIRECT_CONTROLS in the SCOM list + * + * Bits in there change from CPU rev to CPU rev but the bit we care + * about, bit 60 "sreset_request" appears to have stuck to the same + * place in both P7 and P7+. The register also has the same SCOM + * address + */ +#define EX0_TCTL_DIRECT_CONTROLS0 0x08010400 +#define EX0_TCTL_DIRECT_CONTROLS1 0x08010440 +#define EX0_TCTL_DIRECT_CONTROLS2 0x08010480 +#define EX0_TCTL_DIRECT_CONTROLS3 0x080104c0 +#define TCTL_DC_SRESET_REQUEST PPC_BIT(60) + +/* Flag tested by the OPAL entry code */ +uint8_t reboot_in_progress; +static struct cpu_thread *resettor, *resettee; + +static void flush_caches(void) +{ + uint64_t base = SKIBOOT_BASE; + uint64_t end = base + SKIBOOT_SIZE; + + /* Not sure what the effect of sreset is on cores, so let's + * shoot a series of dcbf's on all cachelines that make up + * our core memory just in case... + */ + while(base < end) { + asm volatile("dcbf 0,%0" : : "r" (base) : "memory"); + base += 128; + } + sync(); +} + +static bool do_reset_core_p7(struct cpu_thread *cpu) +{ + uint32_t xscom_addr, chip; + uint64_t ctl; + int rc; + + /* Add the Core# */ + xscom_addr = EX0_TCTL_DIRECT_CONTROLS0; + xscom_addr |= ((cpu->pir >> 2) & 7) << 24; + + chip = pir_to_chip_id(cpu->pir); + + ctl = TCTL_DC_SRESET_REQUEST; + rc = xscom_write(chip, xscom_addr, ctl); + rc |= xscom_write(chip, xscom_addr + 0x40, ctl); + rc |= xscom_write(chip, xscom_addr + 0x80, ctl); + rc |= xscom_write(chip, xscom_addr + 0xc0, ctl); + if (rc) { + prerror("RESET: Error %d resetting CPU 0x%04x\n", + rc, cpu->pir); + return false; + } + return true; +} + +static void fast_reset_p7(void) +{ + struct cpu_thread *cpu; + + resettee = this_cpu(); + resettor = NULL; + + /* Pick up a candidate resettor. We do that before we flush + * the caches + */ + for_each_cpu(cpu) { + /* + * Some threads might still be in skiboot. + * + * But because we deal with entire cores and we don't want + * to special case things, we are just going to reset them + * too making the assumption that this is safe, they are + * holding no locks. This can only be true if they don't + * have jobs scheduled which is hopefully the case. + */ + if (cpu->state != cpu_state_os && + cpu->state != cpu_state_active) + continue; + + /* + * Only hit cores and only if they aren't on the same core + * as ourselves + */ + if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) || + cpu->pir & 0x3) + continue; + + /* Pick up one of those guys as our "resettor". It will be + * in charge of resetting this CPU. We avoid resetting + * ourselves, not sure how well it would do with SCOM + */ + resettor = cpu; + break; + } + + if (!resettor) { + printf("RESET: Can't find a resettor !\n"); + return; + } + printf("RESET: Resetting from 0x%04x, resettor 0x%04x\n", + this_cpu()->pir, resettor->pir); + + printf("RESET: Flushing caches...\n"); + + /* Is that necessary ? */ + flush_caches(); + + /* Reset everybody except self and except resettor */ + for_each_cpu(cpu) { + if (cpu->state != cpu_state_os && + cpu->state != cpu_state_active) + continue; + if (cpu_get_thread0(cpu) == cpu_get_thread0(this_cpu()) || + cpu->pir & 0x3) + continue; + if (cpu_get_thread0(cpu) == cpu_get_thread0(resettor)) + continue; + + printf("RESET: Resetting CPU 0x%04x...\n", cpu->pir); + + if (!do_reset_core_p7(cpu)) + return; + } + + /* Reset the resettor last because it's going to kill me ! */ + printf("RESET: Resetting CPU 0x%04x...\n", resettor->pir); + if (!do_reset_core_p7(resettor)) + return; + + /* Don't return */ + for (;;) + ; +} + +void fast_reset(void) +{ + uint32_t pvr = mfspr(SPR_PVR); + extern uint32_t fast_reset_patch_start; + extern uint32_t fast_reset_patch_end; + uint32_t *dst, *src; + + printf("RESET: Fast reboot request !\n"); + + /* XXX We need a way to ensure that no other CPU is in skiboot + * holding locks (via the OPAL APIs) and if they are, we need + * for them to get out + */ + reboot_in_progress = 1; + time_wait_ms(200); + + /* Copy reset trampoline */ + printf("RESET: Copying reset trampoline...\n"); + src = &fast_reset_patch_start; + dst = (uint32_t *)0x100; + while(src < &fast_reset_patch_end) + *(dst++) = *(src++); + sync_icache(); + + switch(PVR_TYPE(pvr)) { + case PVR_TYPE_P7: + case PVR_TYPE_P7P: + fast_reset_p7(); + } +} + +static void cleanup_cpu_state(void) +{ + if (cpu_is_thread0(this_cpu())) { + cleanup_tlb(); + init_shared_sprs(); + } + init_replicated_sprs(); + reset_cpu_icp(); +} + +#ifdef FAST_REBOOT_CLEARS_MEMORY +static void fast_mem_clear(uint64_t start, uint64_t end) +{ + printf("MEMORY: Clearing %llx..%llx\n", start, end); + + while(start < end) { + asm volatile("dcbz 0,%0" : : "r" (start) : "memory"); + start += 128; + } +} + +static void memory_reset(void) +{ + struct address_range *i; + uint64_t skistart = SKIBOOT_BASE; + uint64_t skiend = SKIBOOT_BASE + SKIBOOT_SIZE; + + printf("MEMORY: Clearing ...\n"); + + list_for_each(&address_ranges, i, list) { + uint64_t start = cleanup_addr(i->arange->start); + uint64_t end = cleanup_addr(i->arange->end); + + if (start >= skiend || end <= skistart) + fast_mem_clear(start, end); + else { + if (start < skistart) + fast_mem_clear(start, skistart); + if (end > skiend) + fast_mem_clear(skiend, end); + } + } +} +#endif /* FAST_REBOOT_CLEARS_MEMORY */ + +/* Entry from asm after a fast reset */ +void fast_reboot(void); + +void fast_reboot(void) +{ + static volatile bool fast_boot_release; + struct cpu_thread *cpu; + + printf("INIT: CPU PIR 0x%04x reset in\n", this_cpu()->pir); + + /* If this CPU was chosen as the resettor, it must reset the + * resettee (the one that initiated the whole process + */ + if (this_cpu() == resettor) + do_reset_core_p7(resettee); + + /* Are we the original boot CPU ? If not, we spin waiting + * for a relase signal from CPU 1, then we clean ourselves + * up and go processing jobs. + */ + if (this_cpu() != boot_cpu) { + this_cpu()->state = cpu_state_present; + while (!fast_boot_release) { + smt_very_low(); + sync(); + } + smt_medium(); + cleanup_cpu_state(); + __secondary_cpu_entry(); + } + + /* We are the original boot CPU, wait for secondaries to + * be captured + */ + for_each_cpu(cpu) { + if (cpu == this_cpu()) + continue; + + /* XXX Add a callin timeout ? */ + while (cpu->state != cpu_state_present) { + smt_very_low(); + sync(); + } + smt_medium(); + } + + printf("INIT: Releasing secondaries...\n"); + + /* Release everybody */ + fast_boot_release = true; + sync(); + + /* Wait for them to respond */ + for_each_cpu(cpu) { + if (cpu == this_cpu()) + continue; + + /* XXX Add a callin timeout ? */ + while (cpu->state == cpu_state_present) { + smt_very_low(); + sync(); + } + } + + printf("INIT: All done, resetting everything else...\n"); + + /* Clear release flag for next time */ + fast_boot_release = false; + reboot_in_progress = 0; + + /* Cleanup ourselves */ + cleanup_cpu_state(); + + /* Set our state to active */ + this_cpu()->state = cpu_state_active; + + /* Poke the consoles (see comments in the code there) */ + fsp_console_reset(); + + /* Reset/EOI the PSI interrupt */ + psi_irq_reset(); + + /* Remove all PCI devices */ + pci_reset(); + + /* Reset IO Hubs */ + cec_reset(); + + /* Re-Initialize all discovered PCI slots */ + pci_init_slots(); + + /* Clear memory */ +#ifdef FAST_REBOOT_CLEARS_MEMORY + memory_reset(); +#endif + load_and_boot_kernel(true); +} diff --git a/core/fdt.c b/core/fdt.c new file mode 100644 index 0000000..62e60fc --- /dev/null +++ b/core/fdt.c @@ -0,0 +1,208 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <stdarg.h> +#include <libfdt.h> +#include <device.h> +#include <cpu.h> +#include <memory.h> +#include <opal.h> +#include <interrupts.h> +#include <fsp.h> +#include <cec.h> +#include <vpd.h> +#include <ccan/str/str.h> + +static int fdt_error; +static void *fdt; + +#undef DEBUG_FDT + +static void __save_err(int err, const char *str) +{ +#ifdef DEBUG_FDT + printf("FDT: rc: %d from \"%s\"\n", err, str); +#endif + if (err && !fdt_error) { + prerror("FDT: Error %d from \"%s\"\n", err, str); + fdt_error = err; + } +} + +#define save_err(...) __save_err(__VA_ARGS__, #__VA_ARGS__) + +static void dt_property_cell(const char *name, u32 cell) +{ + save_err(fdt_property_cell(fdt, name, cell)); +} + +static void dt_begin_node(const char *name, uint32_t phandle) +{ + save_err(fdt_begin_node(fdt, name)); + + /* + * We add both the new style "phandle" and the legacy + * "linux,phandle" properties + */ + dt_property_cell("linux,phandle", phandle); + dt_property_cell("phandle", phandle); +} + +static void dt_property(const char *name, const void *val, size_t size) +{ + save_err(fdt_property(fdt, name, val, size)); +} + +static void dt_end_node(void) +{ + save_err(fdt_end_node(fdt)); +} + +static void dump_fdt(void) +{ +#ifdef DEBUG_FDT + int i, off, depth, err; + + printf("Device tree %u@%p\n", fdt_totalsize(fdt), fdt); + + err = fdt_check_header(fdt); + if (err) { + prerror("fdt_check_header: %s\n", fdt_strerror(err)); + return; + } + printf("fdt_check_header passed\n"); + + printf("fdt_num_mem_rsv = %u\n", fdt_num_mem_rsv(fdt)); + for (i = 0; i < fdt_num_mem_rsv(fdt); i++) { + u64 addr, size; + + err = fdt_get_mem_rsv(fdt, i, &addr, &size); + if (err) { + printf(" ERR %s\n", fdt_strerror(err)); + return; + } + printf(" mem_rsv[%i] = %lu@%#lx\n", i, (long)addr, (long)size); + } + + for (off = fdt_next_node(fdt, 0, &depth); + off > 0; + off = fdt_next_node(fdt, off, &depth)) { + int len; + const char *name; + + name = fdt_get_name(fdt, off, &len); + if (!name) { + prerror("fdt: offset %i no name!\n", off); + return; + } + printf("name: %s [%u]\n", name, off); + } +#endif +} + +static void flatten_dt_node(const struct dt_node *root) +{ + const struct dt_node *i; + const struct dt_property *p; + +#ifdef DEBUG_FDT + printf("FDT: node: %s\n", root->name); +#endif + + list_for_each(&root->properties, p, list) { + if (strstarts(p->name, DT_PRIVATE)) + continue; +#ifdef DEBUG_FDT + printf("FDT: prop: %s size: %ld\n", p->name, p->len); +#endif + dt_property(p->name, p->prop, p->len); + } + + list_for_each(&root->children, i, list) { + dt_begin_node(i->name, i->phandle); + flatten_dt_node(i); + dt_end_node(); + } +} + +static void create_dtb_reservemap(const struct dt_node *root) +{ + uint64_t base, size; + const uint64_t *ranges; + const struct dt_property *prop; + int i; + + /* Duplicate the reserved-ranges property into the fdt reservemap */ + prop = dt_find_property(root, "reserved-ranges"); + if (prop) { + ranges = (const void *)prop->prop; + + for (i = 0; i < prop->len / (sizeof(uint64_t) * 2); i++) { + base = *(ranges++); + size = *(ranges++); + save_err(fdt_add_reservemap_entry(fdt, base, size)); + } + } + + save_err(fdt_finish_reservemap(fdt)); +} + +void *create_dtb(const struct dt_node *root) +{ + size_t len = DEVICE_TREE_MAX_SIZE; + uint32_t old_last_phandle = last_phandle; + + do { + if (fdt) + free(fdt); + last_phandle = old_last_phandle; + fdt_error = 0; + fdt = malloc(len); + if (!fdt) { + prerror("dtb: could not malloc %lu\n", (long)len); + return NULL; + } + + fdt_create(fdt, len); + + create_dtb_reservemap(root); + + /* Open root node */ + dt_begin_node(root->name, root->phandle); + + /* Unflatten our live tree */ + flatten_dt_node(root); + + /* Close root node */ + dt_end_node(); + + save_err(fdt_finish(fdt)); + + if (!fdt_error) + break; + + len *= 2; + } while (fdt_error == -FDT_ERR_NOSPACE); + + dump_fdt(); + + if (fdt_error) { + prerror("dtb: error %s\n", fdt_strerror(fdt_error)); + return NULL; + } + return fdt; +} diff --git a/core/flash-nvram.c b/core/flash-nvram.c new file mode 100644 index 0000000..7e261b1 --- /dev/null +++ b/core/flash-nvram.c @@ -0,0 +1,76 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <skiboot.h> +#include <device.h> +#include <console.h> +#include <opal.h> +#include <platform.h> +#include <libflash/libflash.h> + +static struct flash_chip *fl_nv_chip; +static uint32_t fl_nv_start, fl_nv_size; + +static int flash_nvram_info(uint32_t *total_size) +{ + if (!fl_nv_chip) + return OPAL_HARDWARE; + *total_size = fl_nv_size; + return OPAL_SUCCESS; +} + +static int flash_nvram_start_read(void *dst, uint32_t src, uint32_t len) +{ + int rc; + + if ((src + len) > fl_nv_size) { + prerror("FLASH_NVRAM: read out of bound (0x%x,0x%x)\n", + src, len); + return OPAL_PARAMETER; + } + rc = flash_read(fl_nv_chip, fl_nv_start + src, dst, len); + if (rc) + return rc; + nvram_read_complete(true); + return 0; +} + +static int flash_nvram_write(uint32_t dst, void *src, uint32_t len) +{ + /* TODO: When we have async jobs for PRD, turn this into one */ + + if ((dst + len) > fl_nv_size) { + prerror("FLASH_NVRAM: write out of bound (0x%x,0x%x)\n", + dst, len); + return OPAL_PARAMETER; + } + return flash_smart_write(fl_nv_chip, fl_nv_start + dst, src, len); +} + +int flash_nvram_init(struct flash_chip *chip, uint32_t start, uint32_t size) +{ + fl_nv_chip = chip; + fl_nv_start = start; + fl_nv_size = size; + + platform.nvram_info = flash_nvram_info; + platform.nvram_start_read = flash_nvram_start_read; + platform.nvram_write = flash_nvram_write; + + return 0; +} + diff --git a/core/hostservices.c b/core/hostservices.c new file mode 100644 index 0000000..85e62e3 --- /dev/null +++ b/core/hostservices.c @@ -0,0 +1,826 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdint.h> +#include <stdlib.h> +#include <stdio.h> + +#include <lock.h> +#include <device.h> +#include <compiler.h> +#include <hostservices.h> +#include <mem_region.h> +#include <xscom.h> +#include <fsp.h> +#include <chip.h> +#include <console.h> +#include <mem-map.h> +#include <timebase.h> + +#define HOSTBOOT_RUNTIME_INTERFACE_VERSION 1 + +struct host_interfaces { + /** Interface version. */ + uint64_t interface_version; + + /** Put a string to the console. */ + void (*puts)(const char*); + /** Critical failure in runtime execution. */ + void (*assert)(void); + + /** OPTIONAL. Hint to environment that the page may be executed. */ + int (*set_page_execute)(void*); + + /** malloc */ + void *(*malloc)(size_t); + /** free */ + void (*free)(void*); + /** realloc */ + void *(*realloc)(void*, size_t); + + /** sendErrorLog + * @param[in] plid Platform Log identifier + * @param[in] data size in bytes + * @param[in] pointer to data + * @return 0 on success else error code + */ + int (*send_error_log)(uint32_t,uint32_t,void *); + + /** Scan communication read + * @param[in] chip_id (based on devtree defn) + * @param[in] address + * @param[in] pointer to 8-byte data buffer + * @return 0 on success else return code + */ + int (*scom_read)(uint64_t, uint64_t, void*); + + /** Scan communication write + * @param[in] chip_id (based on devtree defn) + * @param[in] address + * @param[in] pointer to 8-byte data buffer + * @return 0 on success else return code + */ + int (*scom_write)(uint64_t, uint64_t, const void *); + + /** lid_load + * Load a LID from PNOR, FSP, etc. + * + * @param[in] LID number. + * @param[out] Allocated buffer for LID. + * @param[out] Size of LID (in bytes). + * + * @return 0 on success, else RC. + */ + int (*lid_load)(uint32_t lid, void **buf, size_t *len); + + /** lid_unload + * Release memory from previously loaded LID. + * + * @param[in] Allocated buffer for LID to release. + * + * @return 0 on success, else RC. + */ + int (*lid_unload)(void *buf); + + /** Get the address of a reserved memory region by its devtree name. + * + * @param[in] Devtree name (ex. "ibm,hbrt-vpd-image") + * @return physical address of region (or NULL). + **/ + uint64_t (*get_reserved_mem)(const char*); + + /** + * @brief Force a core to be awake, or clear the force + * @param[in] i_core Core to wake up (pid) + * @param[in] i_mode 0=force awake + * 1=clear force + * 2=clear all previous forces + * @return rc non-zero on error + */ + int (*wakeup)( uint32_t i_core, uint32_t i_mode ); + + /** + * @brief Delay/sleep for at least the time given + * @param[in] seconds + * @param[in] nano seconds + */ + void (*nanosleep)(uint64_t i_seconds, uint64_t i_nano_seconds); + + // Reserve some space for future growth. + void (*reserved[32])(void); +}; + +struct runtime_interfaces { + /** Interface version. */ + uint64_t interface_version; + + /** Execute CxxTests that may be contained in the image. + * + * @param[in] - Pointer to CxxTestStats structure for results reporting. + */ + void (*cxxtestExecute)(void *); + /** Get a list of lids numbers of the lids known to HostBoot + * + * @param[out] o_num - the number of lids in the list + * @return a pointer to the list + */ + const uint32_t * (*get_lid_list)(size_t * o_num); + + /** Load OCC Image and common data into mainstore, also setup OCC BARSs + * + * @param[in] i_homer_addr_phys - The physical mainstore address of the + * start of the HOMER image + * @param[in] i_homer_addr_va - Virtual memory address of the HOMER image + * @param[in] i_common_addr_phys - The physical mainstore address of the + * OCC common area. + * @param[in] i_common_addr_va - Virtual memory address of the common area + * @param[in] i_chip - The HW chip id (XSCOM chip ID) + * @return 0 on success else return code + */ + int(*loadOCC)(uint64_t i_homer_addr_phys, + uint64_t i_homer_addr_va, + uint64_t i_common_addr_phys, + uint64_t i_common_addr_va, + uint64_t i_chip); + + /** Start OCC on all chips, by module + * + * @param[in] i_chip - Array of functional HW chip ids + * @Note The caller must include a complete modules worth of chips + * @param[in] i_num_chips - Number of chips in the array + * @return 0 on success else return code + */ + int (*startOCCs)(uint64_t* i_chip, + size_t i_num_chips); + + /** Stop OCC hold OCCs in reset + * + * @param[in] i_chip - Array of functional HW chip ids + * @Note The caller must include a complete modules worth of chips + * @param[in] i_num_chips - Number of chips in the array + * @return 0 on success else return code + */ + int (*stopOCCs)(uint64_t* i_chip, + size_t i_num_chips); + + /* Reserve some space for future growth. */ + void (*reserved[32])(void); +}; + +static struct runtime_interfaces *hservice_runtime; + +static char *hbrt_con_buf = (char *)HBRT_CON_START; +static size_t hbrt_con_pos; +static bool hbrt_con_wrapped; + +#define HBRT_CON_IN_LEN 0 +#define HBRT_CON_OUT_LEN (HBRT_CON_LEN - HBRT_CON_IN_LEN) + +struct memcons hbrt_memcons __section(".data.memcons") = { + .magic = MEMCONS_MAGIC, + .obuf_phys = HBRT_CON_START, + .ibuf_phys = HBRT_CON_START + HBRT_CON_OUT_LEN, + .obuf_size = HBRT_CON_OUT_LEN, + .ibuf_size = HBRT_CON_IN_LEN, +}; + +static void hservice_putc(char c) +{ + uint32_t opos; + + hbrt_con_buf[hbrt_con_pos++] = c; + if (hbrt_con_pos >= HBRT_CON_OUT_LEN) { + hbrt_con_pos = 0; + hbrt_con_wrapped = true; + } + + /* + * We must always re-generate memcons.out_pos because + * under some circumstances, the console script will + * use a broken putmemproc that does RMW on the full + * 8 bytes containing out_pos and in_prod, thus corrupting + * out_pos + */ + opos = hbrt_con_pos; + if (hbrt_con_wrapped) + opos |= MEMCONS_OUT_POS_WRAP; + lwsync(); + hbrt_memcons.out_pos = opos; +} + +static void hservice_puts(const char *str) +{ + char c; + + while((c = *(str++)) != 0) + hservice_putc(c); + hservice_putc(10); +} + +static void hservice_mark(void) +{ + hservice_puts("--------------------------------------------------" + "--------------------------------------------------\n"); +} + +static void hservice_assert(void) +{ + prerror("HBRT: Assertion from hostservices\n"); + abort(); +} + +static void *hservice_malloc(size_t size) +{ + return malloc(size); +} + +static void hservice_free(void *ptr) +{ + free(ptr); +} + + +static void *hservice_realloc(void *ptr, size_t size) +{ + return realloc(ptr, size); +} + +struct hbrt_elog_ent { + void *buf; + unsigned int size; + unsigned int plid; + struct list_node link; +}; +static LIST_HEAD(hbrt_elogs); +static struct lock hbrt_elog_lock = LOCK_UNLOCKED; +static bool hbrt_elog_sending; +static void hservice_start_elog_send(void); + +static void hservice_elog_write_complete(struct fsp_msg *msg) +{ + struct hbrt_elog_ent *ent = msg->user_data; + + lock(&hbrt_elog_lock); + printf("HBRT: Completed send of PLID 0x%08x\n", ent->plid); + hbrt_elog_sending = false; + fsp_tce_unmap(PSI_DMA_HBRT_LOG_WRITE_BUF, + PSI_DMA_HBRT_LOG_WRITE_BUF_SZ); + free(ent->buf); + free(ent); + fsp_freemsg(msg); + hservice_start_elog_send(); + unlock(&hbrt_elog_lock); +} + +static void hservice_start_elog_send(void) +{ + struct fsp_msg *msg; + struct hbrt_elog_ent *ent; + + again: + if (list_empty(&hbrt_elogs)) + return; + ent = list_pop(&hbrt_elogs, struct hbrt_elog_ent, link); + + hbrt_elog_sending = true; + + printf("HBRT: Starting send of PLID 0x%08x\n", ent->plid); + + fsp_tce_map(PSI_DMA_HBRT_LOG_WRITE_BUF, ent->buf, + PSI_DMA_HBRT_LOG_WRITE_BUF_SZ); + + msg = fsp_mkmsg(FSP_CMD_WRITE_SP_DATA, 6, FSP_DATASET_HBRT_BLOB, + 0, 0, 0, PSI_DMA_HBRT_LOG_WRITE_BUF, + ent->size); + + if (!msg) { + prerror("HBRT: Failed to create error msg log to FSP\n"); + goto error; + } + msg->user_data = ent; + if (!fsp_queue_msg(msg, hservice_elog_write_complete)) + return; + prerror("FSP: Error queueing elog update\n"); + error: + if (msg) + fsp_freemsg(msg); + fsp_tce_unmap(PSI_DMA_HBRT_LOG_WRITE_BUF, + PSI_DMA_HBRT_LOG_WRITE_BUF_SZ); + free(ent->buf); + free(ent); + hbrt_elog_sending = false; + goto again; +} + +static int hservice_send_error_log(uint32_t plid, uint32_t dsize, void *data) +{ + struct hbrt_elog_ent *ent; + void *abuf; + + printf("HBRT: Error log generated with plid 0x%08x\n", plid); + + /* We only know how to send error logs to FSP */ + if (!fsp_present()) { + prerror("HBRT: Warning, error log from HBRT discarded !\n"); + return OPAL_UNSUPPORTED; + } + if (dsize > PSI_DMA_HBRT_LOG_WRITE_BUF_SZ) { + prerror("HBRT: Warning, error log from HBRT too big (%d) !\n", + dsize); + dsize = PSI_DMA_HBRT_LOG_WRITE_BUF_SZ; + } + + lock(&hbrt_elog_lock); + + /* Create and populate a tracking structure */ + ent = zalloc(sizeof(struct hbrt_elog_ent)); + if (!ent) { + unlock(&hbrt_elog_lock); + return OPAL_NO_MEM; + } + + /* Grab a 4k aligned page */ + abuf = memalign(0x1000, PSI_DMA_HBRT_LOG_WRITE_BUF_SZ); + if (!abuf) { + free(ent); + unlock(&hbrt_elog_lock); + return OPAL_NO_MEM; + } + memset(abuf, 0, PSI_DMA_HBRT_LOG_WRITE_BUF_SZ); + memcpy(abuf, data, dsize); + ent->buf = abuf; + ent->size = dsize; + ent->plid = plid; + list_add_tail(&hbrt_elogs, &ent->link); + if (!hbrt_elog_sending) + hservice_start_elog_send(); + unlock(&hbrt_elog_lock); + + return 0; +} + +static int hservice_scom_read(uint64_t chip_id, uint64_t addr, void *buf) +{ + return xscom_read(chip_id, addr, buf); +} + +static int hservice_scom_write(uint64_t chip_id, uint64_t addr, + const void *buf) +{ + uint64_t val; + + memcpy(&val, buf, sizeof(val)); + return xscom_write(chip_id, addr, val); +} + +static int hservice_lid_load(uint32_t lid, void **buf, size_t *len) +{ + int rc; + static void *lid_cache; + static size_t lid_cache_len; + static uint32_t lid_cache_id; + + printf("HBRT: LID load request for 0x%08x\n", lid); + + /* Adjust LID side first or we get a cache mismatch */ + lid = fsp_adjust_lid_side(lid); + + /* Check for cache */ + if (lid_cache && lid_cache_id == lid) { + *buf = lid_cache; + *len = lid_cache_len; + printf("HBRT: Serviced from cache, len=0x%lx\n", lid_cache_len); + return 0; + } + + /* Cache mismatch, discard old one */ + if (lid_cache) { + printf("HBRT: Cache mismatch, discarding old 0x%08x\n", + lid_cache_id); + free(lid_cache); + lid_cache = NULL; + } + + /* Allocate a new buffer and load the LID into it */ + *buf = malloc(HBRT_LOAD_LID_SIZE); + *len = HBRT_LOAD_LID_SIZE; + rc = fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0, *buf, len); + if (rc != 0) + /* Take advantage of realloc corner case here. */ + *len = 0; + *buf = realloc(*buf, *len); + + /* We managed, let's cache it */ + if (rc == 0 && *len) { + lid_cache = *buf; + lid_cache_len = *len; + lid_cache_id = lid; + + printf("HBRT: LID 0x%08x successfully loaded and cached" + ", len=0x%lx\n", lid, lid_cache_len); + } + + return rc; +} + +static int hservice_lid_unload(void *buf __unused) +{ + /* We do nothing as the LID is held in cache */ + return 0; +} + +static uint64_t hservice_get_reserved_mem(const char *name) +{ + struct mem_region *region; + uint64_t ret; + + /* We assume it doesn't change after we've unlocked it, but + * lock ensures list is safe to walk. */ + lock(&mem_region_lock); + region = find_mem_region(name); + ret = region ? region->start : 0; + unlock(&mem_region_lock); + + if (!ret) + prerror("HBRT: Mem region '%s' not found !\n", name); + + return ret; +} + +static void hservice_nanosleep(uint64_t i_seconds, uint64_t i_nano_seconds) +{ + struct timespec ts; + + ts.tv_sec = i_seconds; + ts.tv_nsec = i_nano_seconds; + nanosleep(&ts, NULL); +} + +static int hservice_set_special_wakeup(struct cpu_thread *cpu) +{ + uint64_t val, core_id, poll_target, stamp; + int rc; + + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ + + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); + + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not neccessary + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), + PPC_BIT(0)); + if (rc) { + prerror("HBRT: XSCOM error %d asserting special" + " wakeup on 0x%x\n", rc, cpu->pir); + return rc; + } + + /* + * HWP uses the history for Perf register here, dunno why it uses + * that one instead of the pHyp one, maybe to avoid clobbering it... + * + * In any case, it does that to check for run/nap vs.sleep/winkle/other + * to decide whether to poll on checkstop or not. Since we don't deal + * with checkstop conditions here, we ignore that part. + */ + + /* + * Now poll for completion of special wakeup. The HWP is nasty here, + * it will poll at 5ms intervals for up to 200ms. This is not quite + * acceptable for us at runtime, at least not until we have the + * ability to "context switch" HBRT. In practice, because we don't + * winkle, it will never take that long, so we increase the polling + * frequency to 1us per poll. However we do have to keep the same + * timeout. + * + * We don't use time_wait_ms() either for now as we don't want to + * poll the FSP here. + */ + stamp = mftb(); + poll_target = stamp + msecs_to_tb(200); + val = 0; + while (!(val & EX_PM_GP0_SPECIAL_WAKEUP_DONE)) { + /* Wait 1 us */ + hservice_nanosleep(0, 1000); + + /* Read PM state */ + rc = xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_GP0), + &val); + if (rc) { + prerror("HBRT: XSCOM error %d reading PM state on" + " 0x%x\n", rc, cpu->pir); + return rc; + } + /* Check timeout */ + if (mftb() > poll_target) + break; + } + + /* Success ? */ + if (val & EX_PM_GP0_SPECIAL_WAKEUP_DONE) { + uint64_t now = mftb(); + printf("HBRT: Special wakeup complete after %ld us\n", + tb_to_usecs(now - stamp)); + return 0; + } + + /* + * We timed out ... + * + * HWP has a complex workaround for HW255321 which affects + * Murano DD1 and Venice DD1. Ignore that for now + * + * Instead we just dump some XSCOMs for error logging + */ + prerror("HBRT: Timeout on special wakeup of 0x%0x\n", cpu->pir); + prerror("HBRT: PM0 = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + prerror("HBRT: SPC_WKUP = 0x%016llx\n", val); + val = -1; + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_IDLE_STATE_HISTORY_PHYP), + &val); + prerror("HBRT: HISTORY = 0x%016llx\n", val); + + return OPAL_HARDWARE; +} + +static int hservice_clr_special_wakeup(struct cpu_thread *cpu) +{ + uint64_t val, core_id; + int rc; + + /* + * Note: HWP checks for checkstops, but I assume we don't need to + * as we wouldn't be running if one was present + */ + + /* Grab core ID once */ + core_id = pir_to_core_id(cpu->pir); + + /* + * The original HWp reads the XSCOM first but ignores the result + * and error, let's do the same until I know for sure that is + * not neccessary + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + /* Then we write special wakeup */ + rc = xscom_write(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, + EX_PM_SPECIAL_WAKEUP_PHYP), 0); + if (rc) { + prerror("HBRT: XSCOM error %d deasserting" + " special wakeup on 0x%x\n", rc, cpu->pir); + return rc; + } + + /* + * The original HWp reads the XSCOM again with the comment + * "This puts an inherent delay in the propagation of the reset + * transition" + */ + xscom_read(cpu->chip_id, + XSCOM_ADDR_P8_EX_SLAVE(core_id, EX_PM_SPECIAL_WAKEUP_PHYP), + &val); + + return 0; +} + +static int hservice_wakeup(uint32_t i_core, uint32_t i_mode) +{ + struct cpu_thread *cpu; + int rc = OPAL_SUCCESS; + + /* + * Mask out the top nibble of i_core since it may contain + * 0x4 (which we use for XSCOM targetting) + */ + i_core &= 0x0fffffff; + + /* What do we need to do ? */ + switch(i_mode) { + case 0: /* Assert special wakeup */ + /* XXX Assume P8 */ + cpu = find_cpu_by_pir(i_core << 3); + if (!cpu) + return OPAL_PARAMETER; + printf("HBRT: Special wakeup assert for core 0x%x, count=%d\n", + i_core, cpu->hbrt_spec_wakeup); + if (cpu->hbrt_spec_wakeup == 0) + rc = hservice_set_special_wakeup(cpu); + if (rc == 0) + cpu->hbrt_spec_wakeup++; + return rc; + case 1: /* Deassert special wakeup */ + /* XXX Assume P8 */ + cpu = find_cpu_by_pir(i_core << 3); + if (!cpu) + return OPAL_PARAMETER; + printf("HBRT: Special wakeup release for core 0x%x, count=%d\n", + i_core, cpu->hbrt_spec_wakeup); + if (cpu->hbrt_spec_wakeup == 0) { + prerror("HBRT: Special wakeup clear" + " on core 0x%x with count=0\n", + i_core); + return OPAL_WRONG_STATE; + } + /* What to do with count on errors ? */ + cpu->hbrt_spec_wakeup--; + if (cpu->hbrt_spec_wakeup == 0) + rc = hservice_clr_special_wakeup(cpu); + return rc; + case 2: /* Clear all special wakeups */ + printf("HBRT: Special wakeup release for all cores\n"); + for_each_cpu(cpu) { + if (cpu->hbrt_spec_wakeup) { + cpu->hbrt_spec_wakeup = 0; + /* What to do on errors ? */ + hservice_clr_special_wakeup(cpu); + } + } + return OPAL_SUCCESS; + default: + return OPAL_PARAMETER; + } +} + +static struct host_interfaces hinterface = { + .interface_version = HOSTBOOT_RUNTIME_INTERFACE_VERSION, + .puts = hservice_puts, + .assert = hservice_assert, + .malloc = hservice_malloc, + .free = hservice_free, + .realloc = hservice_realloc, + .send_error_log = hservice_send_error_log, + .scom_read = hservice_scom_read, + .scom_write = hservice_scom_write, + .lid_load = hservice_lid_load, + .lid_unload = hservice_lid_unload, + .get_reserved_mem = hservice_get_reserved_mem, + .wakeup = hservice_wakeup, + .nanosleep = hservice_nanosleep, +}; + +int host_services_occ_load(void) +{ + struct proc_chip *chip; + int rc = 0; + + printf("HBRT: OCC Load requested\n"); + + if (!(hservice_runtime && hservice_runtime->loadOCC)) { + prerror("HBRT: No hservice_runtime->loadOCC\n"); + return -ENOENT; + } + + for_each_chip(chip) { + + printf("HBRT: [%16lx] Calling loadOCC() homer %016llx, occ_common_area %016llx, " + "chip %04x\n", + mftb(), + chip->homer_base, + chip->occ_common_base, + chip->id); + + rc = hservice_runtime->loadOCC(chip->homer_base, + chip->homer_base, + chip->occ_common_base, + chip->occ_common_base, + chip->id); + + hservice_mark(); + printf("HBRT: [%16lx] -> rc = %d\n", mftb(), rc); + } + return rc; +} + +int host_services_occ_start(void) +{ + struct proc_chip *chip; + int i, rc = 0, nr_chips=0; + uint64_t chipids[MAX_CHIPS]; + + printf("HBRT: OCC Start requested\n"); + + if (!(hservice_runtime && hservice_runtime->startOCCs)) { + prerror("HBRT: No hservice_runtime->startOCCs\n"); + return -ENOENT; + } + + for_each_chip(chip) { + chipids[nr_chips++] = chip->id; + } + + printf("HBRT: [%16lx] Calling startOCC() for IDs: ", mftb()); + for (i = 0; i < nr_chips; i++) + printf("%04llx ", chipids[i]); + printf("\n"); + + /* Lets start all OCC */ + rc = hservice_runtime->startOCCs(chipids, nr_chips); + hservice_mark(); + printf("HBRT: [%16lx] -> rc = %d\n", mftb(), rc); + return rc; +} + +void host_services_occ_base_setup(void) +{ + struct proc_chip *chip; + uint64_t occ_common; + + chip = next_chip(NULL); /* Frist chip */ + occ_common = (uint64_t) local_alloc(chip->id, OCC_COMMON_SIZE, OCC_COMMON_SIZE); + + for_each_chip(chip) { + chip->occ_common_base = occ_common; + chip->occ_common_size = OCC_COMMON_SIZE; + + chip->homer_base = (uint64_t) local_alloc(chip->id, HOMER_IMAGE_SIZE, + HOMER_IMAGE_SIZE); + chip->homer_size = HOMER_IMAGE_SIZE; + memset((void *)chip->homer_base, 0, chip->homer_size); + + printf("HBRT: Chip %d HOMER base %016llx : %08llx " + "OCC common base %016llx : %08llx\n", + chip->id, chip->homer_base, chip->homer_size, + chip->occ_common_base, chip->occ_common_size); + } +} + +bool hservices_init(void) +{ + void *code = NULL; + struct runtime_interfaces *(*hbrt_init)(struct host_interfaces *); + + struct function_descriptor { + void *addr; + void *toc; + } fdesc; + + code = (void *)hservice_get_reserved_mem("ibm,hbrt-code-image"); + if (!code) { + prerror("HBRT: No ibm,hbrt-code-image found.\n"); + return false; + } + + if (memcmp(code, "HBRTVERS", 8) != 0) { + prerror("HBRT: Bad eyecatcher for ibm,hbrt-code-image!\n"); + return false; + } + + printf("HBRT: Found HostBoot Runtime version %llu\n", ((u64 *)code)[1]); + + /* We enter at 0x100 into the image. */ + fdesc.addr = code + 0x100; + /* It doesn't care about TOC */ + fdesc.toc = 0; + + hbrt_init = (void *)&fdesc; + + hservice_runtime = hbrt_init(&hinterface); + hservice_mark(); + if (!hservice_runtime) { + prerror("HBRT: Host services init failed\n"); + return false; + } + + printf("HBRT: Interface version %llu\n", + hservice_runtime->interface_version); + + return true; +} diff --git a/core/init.c b/core/init.c new file mode 100644 index 0000000..3d72ce5 --- /dev/null +++ b/core/init.c @@ -0,0 +1,687 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <fsp.h> +#include <fsp-sysparam.h> +#include <psi.h> +#include <memory.h> +#include <chiptod.h> +#include <nx.h> +#include <cpu.h> +#include <processor.h> +#include <xscom.h> +#include <device_tree.h> +#include <opal.h> +#include <opal-msg.h> +#include <elf.h> +#include <io.h> +#include <cec.h> +#include <device.h> +#include <pci.h> +#include <lpc.h> +#include <chip.h> +#include <interrupts.h> +#include <mem_region.h> +#include <trace.h> +#include <console.h> +#include <fsi-master.h> +#include <centaur.h> +#include <libfdt/libfdt.h> +#include <hostservices.h> + +/* + * Boot semaphore, incremented by each CPU calling in + * + * Forced into data section as it will be used before BSS is initialized + */ +enum ipl_state ipl_state = ipl_initial; +enum proc_gen proc_gen; + +static uint64_t kernel_entry; +static bool kernel_32bit; +static void *fdt; + +struct debug_descriptor debug_descriptor = { + .eye_catcher = "OPALdbug", + .version = DEBUG_DESC_VERSION, + .memcons_phys = (uint64_t)&memcons, + .trace_mask = 0, /* All traces disabled by default */ +}; + +static bool try_load_elf64_le(struct elf_hdr *header) +{ + struct elf64_hdr *kh = (struct elf64_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf64_phdr *ph; + unsigned int i; + + printf("INIT: 64-bit LE kernel discovered\n"); + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf64_phdr *)(load_base + le64_to_cpu(kh->e_phoff)); + for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) { + if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD) + continue; + if (le64_to_cpu(ph->p_vaddr) > le64_to_cpu(kh->e_entry) || + (le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_memsz)) < + le64_to_cpu(kh->e_entry)) + continue; + + /* Get our entry */ + kernel_entry = le64_to_cpu(kh->e_entry) - + le64_to_cpu(ph->p_vaddr) + le64_to_cpu(ph->p_offset); + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + kernel_entry += load_base; + kernel_32bit = false; + + printf("INIT: 64-bit kernel entry at 0x%llx\n", kernel_entry); + + return true; +} + +static bool try_load_elf64(struct elf_hdr *header) +{ + struct elf64_hdr *kh = (struct elf64_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf64_phdr *ph; + unsigned int i; + + /* Check it's a ppc64 LE ELF */ + if (kh->ei_ident == ELF_IDENT && + kh->ei_data == ELF_DATA_LSB && + kh->e_machine == le16_to_cpu(ELF_MACH_PPC64)) { + return try_load_elf64_le(header); + } + + /* Check it's a ppc64 ELF */ + if (kh->ei_ident != ELF_IDENT || + kh->ei_data != ELF_DATA_MSB || + kh->e_machine != ELF_MACH_PPC64) { + prerror("INIT: Kernel doesn't look like an ppc64 ELF\n"); + return false; + } + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf64_phdr *)(load_base + kh->e_phoff); + for (i = 0; i < kh->e_phnum; i++, ph++) { + if (ph->p_type != ELF_PTYPE_LOAD) + continue; + if (ph->p_vaddr > kh->e_entry || + (ph->p_vaddr + ph->p_memsz) < kh->e_entry) + continue; + + /* Get our entry */ + kernel_entry = kh->e_entry - ph->p_vaddr + ph->p_offset; + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + kernel_entry += load_base; + kernel_32bit = false; + + printf("INIT: 64-bit kernel entry at 0x%llx\n", kernel_entry); + + return true; +} + +static bool try_load_elf32_le(struct elf_hdr *header) +{ + struct elf32_hdr *kh = (struct elf32_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf32_phdr *ph; + unsigned int i; + + printf("INIT: 32-bit LE kernel discovered\n"); + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf32_phdr *)(load_base + le32_to_cpu(kh->e_phoff)); + for (i = 0; i < le16_to_cpu(kh->e_phnum); i++, ph++) { + if (le32_to_cpu(ph->p_type) != ELF_PTYPE_LOAD) + continue; + if (le32_to_cpu(ph->p_vaddr) > le32_to_cpu(kh->e_entry) || + (le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_memsz)) < + le32_to_cpu(kh->e_entry)) + continue; + + /* Get our entry */ + kernel_entry = le32_to_cpu(kh->e_entry) - + le32_to_cpu(ph->p_vaddr) + le32_to_cpu(ph->p_offset); + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + + kernel_entry += load_base; + kernel_32bit = true; + + printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry); + + return true; +} + +static bool try_load_elf32(struct elf_hdr *header) +{ + struct elf32_hdr *kh = (struct elf32_hdr *)header; + uint64_t load_base = (uint64_t)kh; + struct elf32_phdr *ph; + unsigned int i; + + /* Check it's a ppc32 LE ELF */ + if (header->ei_ident == ELF_IDENT && + header->ei_data == ELF_DATA_LSB && + header->e_machine == le16_to_cpu(ELF_MACH_PPC32)) { + return try_load_elf32_le(header); + } + + /* Check it's a ppc32 ELF */ + if (header->ei_ident != ELF_IDENT || + header->ei_data != ELF_DATA_MSB || + header->e_machine != ELF_MACH_PPC32) { + prerror("INIT: Kernel doesn't look like an ppc32 ELF\n"); + return false; + } + + /* Look for a loadable program header that has our entry in it + * + * Note that we execute the kernel in-place, we don't actually + * obey the load informations in the headers. This is expected + * to work for the Linux Kernel because it's a fairly dumb ELF + * but it will not work for any ELF binary. + */ + ph = (struct elf32_phdr *)(load_base + kh->e_phoff); + for (i = 0; i < kh->e_phnum; i++, ph++) { + if (ph->p_type != ELF_PTYPE_LOAD) + continue; + if (ph->p_vaddr > kh->e_entry || + (ph->p_vaddr + ph->p_memsz) < kh->e_entry) + continue; + + /* Get our entry */ + kernel_entry = kh->e_entry - ph->p_vaddr + ph->p_offset; + break; + } + + if (!kernel_entry) { + prerror("INIT: Failed to find kernel entry !\n"); + return false; + } + + kernel_entry += load_base; + kernel_32bit = true; + + printf("INIT: 32-bit kernel entry at 0x%llx\n", kernel_entry); + + return true; +} + +/* LID numbers. For now we hijack some of pHyp's own until i figure + * out the whole business with the MasterLID + */ +#define KERNEL_LID_PHYP 0x80a00701 +#define KERNEL_LID_OPAL 0x80f00101 + +extern char __builtin_kernel_start[]; +extern char __builtin_kernel_end[]; +extern uint64_t boot_offset; + +static bool load_kernel(void) +{ + struct elf_hdr *kh; + uint32_t lid; + size_t ksize; + const char *ltype; + + ltype = dt_prop_get_def(dt_root, "lid-type", NULL); + + /* No lid-type, assume stradale, currently pre-loaded at fixed + * address + */ + if (!ltype) { + printf("No lid-type property, assuming FSP-less setup\n"); + ksize = __builtin_kernel_end - __builtin_kernel_start; + if (ksize) { + /* Move the built-in kernel up */ + uint64_t builtin_base = + ((uint64_t)__builtin_kernel_start) - + SKIBOOT_BASE + boot_offset; + printf("Using built-in kernel\n"); + memmove(KERNEL_LOAD_BASE, (void*)builtin_base, ksize); + } else + printf("Assuming kernel at 0x%p\n", KERNEL_LOAD_BASE); + } else { + ksize = KERNEL_LOAD_SIZE; + + /* First try to load an OPAL secondary LID always */ + lid = fsp_adjust_lid_side(KERNEL_LID_OPAL); + printf("Trying to load OPAL secondary LID...\n"); + if (fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0, + KERNEL_LOAD_BASE, &ksize) != 0) { + if (!strcmp(ltype, "opal")) { + prerror("Failed to load in OPAL mode...\n"); + return false; + } + printf("Trying to load as PHYP LID...\n"); + lid = fsp_adjust_lid_side(KERNEL_LID_PHYP); + ksize = KERNEL_LOAD_SIZE; + if (fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid, 0, + KERNEL_LOAD_BASE, &ksize) != 0) { + prerror("Failed to load kernel\n"); + return false; + } + } + } + + printf("INIT: Kernel loaded, size: %zu bytes (0 = unknown preload)\n", + ksize); + + kh = (struct elf_hdr *)KERNEL_LOAD_BASE; + if (kh->ei_class == ELF_CLASS_64) + return try_load_elf64(kh); + else if (kh->ei_class == ELF_CLASS_32) + return try_load_elf32(kh); + + printf("INIT: Neither ELF32 not ELF64 ?\n"); + return false; +} + +void __noreturn load_and_boot_kernel(bool is_reboot) +{ + const struct dt_property *memprop; + uint64_t mem_top; + + memprop = dt_find_property(dt_root, DT_PRIVATE "maxmem"); + if (memprop) + mem_top = (u64)dt_property_get_cell(memprop, 0) << 32 + | dt_property_get_cell(memprop, 1); + else /* XXX HB hack, might want to calc it */ + mem_top = 0x40000000; + + op_display(OP_LOG, OP_MOD_INIT, 0x000A); + + /* Load kernel LID */ + if (!load_kernel()) { + op_display(OP_FATAL, OP_MOD_INIT, 1); + abort(); + } + + if (!is_reboot) { + /* We wait for the nvram read to complete here so we can + * grab stuff from there such as the kernel arguments + */ + fsp_nvram_wait_open(); + + /* Wait for FW VPD data read to complete */ + fsp_code_update_wait_vpd(true); + } + fsp_console_select_stdout(); + + /* + * OCC takes few secs to boot. Call this as late as + * as possible to avoid delay. + */ + occ_pstates_init(); + + /* Set kernel command line argument if specified */ +#ifdef KERNEL_COMMAND_LINE + dt_add_property_string(dt_chosen, "bootargs", KERNEL_COMMAND_LINE); +#endif + + op_display(OP_LOG, OP_MOD_INIT, 0x000B); + + /* Create the device tree blob to boot OS. */ + fdt = create_dtb(dt_root); + if (!fdt) { + op_display(OP_FATAL, OP_MOD_INIT, 2); + abort(); + } + + op_display(OP_LOG, OP_MOD_INIT, 0x000C); + + /* Start the kernel */ + if (!is_reboot) + op_panel_disable_src_echo(); + + /* Clear SRCs on the op-panel when Linux starts */ + op_panel_clear_src(); + + cpu_give_self_os(); + + printf("INIT: Starting kernel at 0x%llx, fdt at %p (size 0x%x)\n", + kernel_entry, fdt, fdt_totalsize(fdt)); + + fdt_set_boot_cpuid_phys(fdt, this_cpu()->pir); + if (kernel_32bit) + start_kernel32(kernel_entry, fdt, mem_top); + start_kernel(kernel_entry, fdt, mem_top); +} + +static void dt_fixups(void) +{ + struct dt_node *n; + struct dt_node *primary_lpc = NULL; + + /* lpc node missing #address/size cells. Also pick one as + * primary for now (TBD: How to convey that from HB) + */ + dt_for_each_compatible(dt_root, n, "ibm,power8-lpc") { + if (!primary_lpc || dt_has_node_property(n, "primary", NULL)) + primary_lpc = n; + if (dt_has_node_property(n, "#address-cells", NULL)) + break; + dt_add_property_cells(n, "#address-cells", 2); + dt_add_property_cells(n, "#size-cells", 1); + dt_add_property_strings(n, "status", "ok"); + } + + /* Missing "primary" property in LPC bus */ + if (primary_lpc && !dt_has_node_property(primary_lpc, "primary", NULL)) + dt_add_property(primary_lpc, "primary", NULL, 0); + + /* Missing "scom-controller" */ + dt_for_each_compatible(dt_root, n, "ibm,xscom") { + if (!dt_has_node_property(n, "scom-controller", NULL)) + dt_add_property(n, "scom-controller", NULL, 0); + } +} + +static void add_arch_vector(void) +{ + /** + * vec5 = a PVR-list : Number-of-option-vectors : + * option-vectors[Number-of-option-vectors + 1] + */ + uint8_t vec5[] = {0x05, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00}; + + if (dt_has_node_property(dt_chosen, "ibm,architecture-vec-5", NULL)) + return; + + dt_add_property(dt_chosen, "ibm,architecture-vec-5", + vec5, sizeof(vec5)); +} + +static void dt_init_misc(void) +{ + /* Check if there's a /chosen node, if not, add one */ + dt_chosen = dt_find_by_path(dt_root, "/chosen"); + if (!dt_chosen) + dt_chosen = dt_new(dt_root, "chosen"); + assert(dt_chosen); + + /* Add IBM architecture vectors if needed */ + add_arch_vector(); + + /* Add the "OPAL virtual ICS*/ + add_ics_node(); + + /* Additional fixups. TODO: Move into platform */ + dt_fixups(); +} + +/* Called from head.S, thus no prototype. */ +void main_cpu_entry(const void *fdt, u32 master_cpu); + +void __noreturn main_cpu_entry(const void *fdt, u32 master_cpu) +{ + /* + * WARNING: At this point. the timebases have + * *not* been synchronized yet. Do not use any timebase + * related functions for timeouts etc... unless you can cope + * with the speed being some random core clock divider and + * the value jumping backward when the synchronization actually + * happens (in chiptod_init() below). + * + * Also the current cpu_thread() struct is not initialized + * either so we need to clear it out first thing first (without + * putting any other useful info in there jus yet) otherwise + * printf an locks are going to play funny games with "con_suspend" + */ + pre_init_boot_cpu(); + + /* + * Before first printk, ensure console buffer is clear or + * reading tools might think it has wrapped + */ + clear_console(); + + printf("SkiBoot %s starting...\n", gitid); + + /* Initialize boot cpu's cpu_thread struct */ + init_boot_cpu(); + + /* Now locks can be used */ + init_locks(); + + /* Create the OPAL call table early on, entries can be overridden + * later on (FSP console code for example) + */ + opal_table_init(); + + /* + * If we are coming in with a flat device-tree, we expand it + * now. Else look for HDAT and create a device-tree from them + * + * Hack alert: When entering via the OPAL entry point, fdt + * is set to -1, we record that and pass it to parse_hdat + */ + if (fdt == (void *)-1ul) + parse_hdat(true, master_cpu); + else if (fdt == NULL) + parse_hdat(false, master_cpu); + else { + dt_expand(fdt); + } + + /* + * From there, we follow a fairly strict initialization order. + * + * First we need to build up our chip data structures and initialize + * XSCOM which will be needed for a number of susbequent things. + * + * We want XSCOM available as early as the platform probe in case the + * probe requires some HW accesses. + * + * We also initialize the FSI master at that point in case we need + * to access chips via that path early on. + */ + init_chips(); + xscom_init(); + mfsi_init(); + + /* + * Put various bits & pieces in device-tree that might not + * already be there such as the /chosen node if not there yet, + * the ICS node, etc... This can potentially use XSCOM + */ + dt_init_misc(); + + /* + * Initialize LPC (P8 only) so we can get to UART, BMC and + * other system controller. This is done before probe_platform + * so that the platform probing code can access an external + * BMC if needed. + */ + lpc_init(); + + /* + * Now, we init our memory map from the device-tree, and immediately + * reserve areas which we know might contain data coming from + * HostBoot. We need to do these things before we start doing + * allocations outside of our heap, such as chip local allocs, + * otherwise we might clobber those data. + */ + mem_region_init(); + + /* Reserve HOMER and OCC area */ + homer_init(); + + /* Initialize host services. */ + hservices_init(); + + /* + * We probe the platform now. This means the platform probe gets + * the opportunity to reserve additional areas of memory if needed. + * + * Note: Timebases still not synchronized. + */ + probe_platform(); + + /* Initialize the rest of the cpu thread structs */ + init_all_cpus(); + + /* Add the /opal node to the device-tree */ + add_opal_node(); + + /* Allocate our split trace buffers now. Depends add_opal_node() */ + init_trace_buffers(); + + /* Get the ICPs and make sure they are in a sane state */ + init_interrupts(); + + /* Grab centaurs from device-tree if present (only on FSP-less) */ + centaur_init(); + + /* Initialize PSI (depends on probe_platform being called) */ + psi_init(); + + /* Call in secondary CPUs */ + cpu_bringup(); + + /* + * Sycnhronize time bases. Thi resets all the TB values to a small + * value (so they appear to go backward at this point), and synchronize + * all core timebases to the global ChipTOD network + */ + chiptod_init(master_cpu); + + /* + * We have initialized the basic HW, we can now call into the + * platform to perform subsequent inits, such as establishing + * communication with the FSP. + */ + if (platform.init) + platform.init(); + + /* Init SLW related stuff, including fastsleep */ + slw_init(); + + op_display(OP_LOG, OP_MOD_INIT, 0x0002); + + /* Read in NVRAM and set it up */ + nvram_init(); + + /* NX init */ + nx_init(); + + /* Initialize the opal messaging */ + opal_init_msg(); + + /* Probe IO hubs */ + probe_p5ioc2(); + probe_p7ioc(); + + /* Probe PHB3 on P8 */ + probe_phb3(); + + /* Initialize PCI */ + pci_init_slots(); + + /* + * These last few things must be done as late as possible + * because they rely on various other things having been setup, + * for example, add_opal_interrupts() will add all the interrupt + * sources that are going to the firmware. We can't add a new one + * after that call. Similarily, the mem_region calls will construct + * the reserve maps in the DT so we shouldn't affect the memory + * regions after that + */ + + /* Add the list of interrupts going to OPAL */ + add_opal_interrupts(); + + /* Now release parts of memory nodes we haven't used ourselves... */ + mem_region_release_unused(); + + /* ... and add remaining reservations to the DT */ + mem_region_add_dt_reserved(); + + load_and_boot_kernel(false); +} + +void __noreturn __secondary_cpu_entry(void) +{ + struct cpu_thread *cpu = this_cpu(); + + /* Secondary CPU called in */ + cpu_callin(cpu); + + /* Wait for work to do */ + while(true) { + int i; + + /* Process pending jobs on this processor */ + cpu_process_jobs(); + + /* Relax a bit to give the simulator some breathing space */ + i = 1000; + while (--i) + smt_very_low(); + smt_low(); + } +} + +/* Called from head.S, thus no prototype. */ +void secondary_cpu_entry(void); + +void __noreturn secondary_cpu_entry(void) +{ + struct cpu_thread *cpu = this_cpu(); + + printf("INIT: CPU PIR 0x%04x called in\n", cpu->pir); + + __secondary_cpu_entry(); +} + diff --git a/core/interrupts.c b/core/interrupts.c new file mode 100644 index 0000000..cabebc2 --- /dev/null +++ b/core/interrupts.c @@ -0,0 +1,332 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <cpu.h> +#include <fsp.h> +#include <interrupts.h> +#include <opal.h> +#include <io.h> +#include <cec.h> +#include <device.h> +#include <ccan/str/str.h> + +/* ICP registers */ +#define ICP_XIRR 0x4 /* 32-bit access */ +#define ICP_CPPR 0x4 /* 8-bit access */ +#define ICP_MFRR 0xc /* 8-bit access */ + +struct irq_source { + uint32_t start; + uint32_t end; + const struct irq_source_ops *ops; + void *data; + struct list_node link; +}; + +static LIST_HEAD(irq_sources); +static struct lock irq_lock = LOCK_UNLOCKED; + +void register_irq_source(const struct irq_source_ops *ops, void *data, + uint32_t start, uint32_t count) +{ + struct irq_source *is, *is1; + + is = zalloc(sizeof(struct irq_source)); + assert(is); + is->start = start; + is->end = start + count; + is->ops = ops; + is->data = data; + + printf("IRQ: Registering %04x..%04x ops @%p (data %p) %s\n", + start, start + count - 1, ops, data, + ops->interrupt ? "[Internal]" : "[OS]"); + + lock(&irq_lock); + list_for_each(&irq_sources, is1, link) { + if (is->end > is1->start && is->start < is1->end) { + prerror("register IRQ source overlap !\n"); + prerror(" new: %x..%x old: %x..%x\n", + is->start, is->end - 1, + is1->start, is1->end - 1); + assert(0); + } + } + list_add_tail(&irq_sources, &is->link); + unlock(&irq_lock); +} + +void unregister_irq_source(uint32_t start, uint32_t count) +{ + struct irq_source *is; + + lock(&irq_lock); + list_for_each(&irq_sources, is, link) { + if (start >= is->start && start < is->end) { + if (start != is->start || + count != (is->end - is->start)) { + prerror("unregister IRQ source mismatch !\n"); + prerror("start:%x, count: %x match: %x..%x\n", + start, count, is->start, is->end); + assert(0); + } + list_del(&is->link); + unlock(&irq_lock); + /* XXX Add synchronize / RCU */ + free(is); + return; + } + } + unlock(&irq_lock); + prerror("unregister IRQ source not found !\n"); + prerror("start:%x, count: %x\n", start, count); + assert(0); +} + +/* + * This takes a 6-bit chip id and returns a 20 bit value representing + * the PSI interrupt. This includes all the fields above, ie, is a + * global interrupt number. + * + * For P8, this returns the base of the 8-interrupts block for PSI + */ +uint32_t get_psi_interrupt(uint32_t chip_id) +{ + uint32_t irq; + + switch(proc_gen) { + case proc_gen_p7: + /* Get the chip ID into position, it already has + * the T bit so all we need is room for the GX + * bit, 9 bit BUID and 4 bit level + */ + irq = chip_id << (1 + 9 + 4); + + /* Add in the BUID */ + irq |= P7_PSI_IRQ_BUID << 4; + break; + case proc_gen_p8: + irq = P8_CHIP_IRQ_BLOCK_BASE(chip_id, P8_IRQ_BLOCK_MISC); + irq += P8_IRQ_MISC_PSI_BASE; + break; + default: + assert(false); + }; + + return irq; +} + + +struct dt_node *add_ics_node(void) +{ + struct dt_node *ics = dt_new_addr(dt_root, "interrupt-controller", 0); + if (!ics) + return NULL; + + dt_add_property_cells(ics, "reg", 0, 0, 0, 0); + dt_add_property_strings(ics, "compatible", "IBM,ppc-xics", + "IBM,opal-xics"); + dt_add_property_cells(ics, "#address-cells", 0); + dt_add_property_cells(ics, "#interrupt-cells", 1); + dt_add_property_string(ics, "device_type", + "PowerPC-Interrupt-Source-Controller"); + dt_add_property(ics, "interrupt-controller", NULL, 0); + + return ics; +} + +uint32_t get_ics_phandle(void) +{ + struct dt_node *i; + + for (i = dt_first(dt_root); i; i = dt_next(dt_root, i)) { + if (streq(i->name, "interrupt-controller@0")) { + return i->phandle; + } + } + abort(); +} + +void add_opal_interrupts(void) +{ + struct irq_source *is; + unsigned int i, count = 0; + uint32_t *irqs = NULL, isn; + + lock(&irq_lock); + list_for_each(&irq_sources, is, link) { + /* + * Add a source to opal-interrupts if it has an + * ->interrupt callback + */ + if (!is->ops->interrupt) + continue; + for (isn = is->start; isn < is->end; isn++) { + i = count++; + irqs = realloc(irqs, 4 * count); + irqs[i] = isn; + } + } + unlock(&irq_lock); + + /* The opal-interrupts property has one cell per interrupt, + * it is not a standard interrupt property + */ + if (irqs) + dt_add_property(opal_node, "opal-interrupts", irqs, count * 4); +} + +/* + * This is called at init time (and one fast reboot) to sanitize the + * ICP. We set our priority to 0 to mask all interrupts and make sure + * no IPI is on the way. + */ +void reset_cpu_icp(void) +{ + void *icp = this_cpu()->icp_regs; + + assert(icp); + + /* Clear pending IPIs */ + out_8(icp + ICP_MFRR, 0xff); + + /* Set priority to max, ignore all incoming interrupts, EOI IPIs */ + out_be32(icp + ICP_XIRR, 2); +} + +/* Used by the PSI code to send an EOI during reset. This will also + * set the CPPR to 0 which should already be the case anyway + */ +void icp_send_eoi(uint32_t interrupt) +{ + void *icp = this_cpu()->icp_regs; + + assert(icp); + + /* Set priority to max, ignore all incoming interrupts */ + out_be32(icp + ICP_XIRR, interrupt & 0xffffff); +} + +/* This is called before winkle, we clear pending IPIs and set our priority + * to 1 to mask all but the IPI + */ +void icp_prep_for_rvwinkle(void) +{ + void *icp = this_cpu()->icp_regs; + + assert(icp); + + /* Clear pending IPIs */ + out_8(icp + ICP_MFRR, 0xff); + + /* Set priority to 1, ignore all incoming interrupts, EOI IPIs */ + out_be32(icp + ICP_XIRR, 0x01000002); +} + +/* This is called to wakeup somebody from winkle */ +void icp_kick_cpu(struct cpu_thread *cpu) +{ + void *icp = cpu->icp_regs; + + assert(icp); + + /* Send high priority IPI */ + out_8(icp + ICP_MFRR, 0); +} + +static struct irq_source *irq_find_source(uint32_t isn) +{ + struct irq_source *is; + + lock(&irq_lock); + list_for_each(&irq_sources, is, link) { + if (isn >= is->start && isn < is->end) { + unlock(&irq_lock); + return is; + } + } + unlock(&irq_lock); + + return NULL; +} + +static int64_t opal_set_xive(uint32_t isn, uint16_t server, uint8_t priority) +{ + struct irq_source *is = irq_find_source(isn); + + if (!is || !is->ops->set_xive) + return OPAL_PARAMETER; + + return is->ops->set_xive(is->data, isn, server, priority); +} +opal_call(OPAL_SET_XIVE, opal_set_xive, 3); + +static int64_t opal_get_xive(uint32_t isn, uint16_t *server, uint8_t *priority) +{ + struct irq_source *is = irq_find_source(isn); + + if (!is || !is->ops->get_xive) + return OPAL_PARAMETER; + + return is->ops->get_xive(is->data, isn, server, priority); +} +opal_call(OPAL_GET_XIVE, opal_get_xive, 3); + +static int64_t opal_handle_interrupt(uint32_t isn, uint64_t *outstanding_event_mask) +{ + struct irq_source *is = irq_find_source(isn); + int64_t rc = OPAL_SUCCESS; + + if (!is || !is->ops->interrupt) { + rc = OPAL_PARAMETER; + goto bail; + } + + is->ops->interrupt(is->data, isn); + + /* Update output events */ + bail: + if (outstanding_event_mask) + *outstanding_event_mask = opal_pending_events; + + return rc; +} +opal_call(OPAL_HANDLE_INTERRUPT, opal_handle_interrupt, 2); + +void init_interrupts(void) +{ + struct dt_node *icp; + const struct dt_property *sranges; + struct cpu_thread *cpu; + u32 base, count, i; + u64 addr, size; + + dt_for_each_compatible(dt_root, icp, "ibm,ppc-xicp") { + sranges = dt_require_property(icp, + "ibm,interrupt-server-ranges", + -1); + base = dt_get_number(sranges->prop, 1); + count = dt_get_number(sranges->prop + 4, 1); + for (i = 0; i < count; i++) { + addr = dt_get_address(icp, i, &size); + cpu = find_cpu_by_server(base + i); + if (cpu) + cpu->icp_regs = (void *)addr; + } + } +} + diff --git a/core/lock.c b/core/lock.c new file mode 100644 index 0000000..fc4bf6b --- /dev/null +++ b/core/lock.c @@ -0,0 +1,125 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <lock.h> +#include <assert.h> +#include <processor.h> +#include <cpu.h> +#include <console.h> + +/* Set to bust locks. Note, this is initialized to true because our + * lock debugging code is not going to work until we have the per + * CPU data initialized + */ +bool bust_locks = true; + +#ifdef DEBUG_LOCKS + +static void lock_error(struct lock *l, const char *reason, uint16_t err) +{ + op_display(OP_FATAL, OP_MOD_LOCK, err); + + fprintf(stderr, "LOCK ERROR: %s @%p (state: 0x%016lx)\n", + reason, l, l->lock_val); + abort(); +} + +static void lock_check(struct lock *l) +{ + if ((l->lock_val & 1) && (l->lock_val >> 32) == this_cpu()->pir) + lock_error(l, "Invalid recursive lock", 0); +} + +static void unlock_check(struct lock *l) +{ + if (!(l->lock_val & 1)) + lock_error(l, "Unlocking unlocked lock", 1); + + if ((l->lock_val >> 32) != this_cpu()->pir) + lock_error(l, "Unlocked non-owned lock", 2); + + if (l->in_con_path && this_cpu()->con_suspend == 0) + lock_error(l, "Unlock con lock with console not suspended", 3); +} + +#else +static inline void lock_check(struct lock *l) { }; +static inline void unlock_check(struct lock *l) { }; +#endif /* DEBUG_LOCKS */ + + +bool try_lock(struct lock *l) +{ + if (__try_lock(l)) { + if (l->in_con_path) + this_cpu()->con_suspend++; + return true; + } + return false; +} + +void lock(struct lock *l) +{ + if (bust_locks) + return; + + lock_check(l); + for (;;) { + if (try_lock(l)) + break; + smt_low(); + } + smt_medium(); +} + +void unlock(struct lock *l) +{ + struct cpu_thread *cpu = this_cpu(); + + if (bust_locks) + return; + + unlock_check(l); + + lwsync(); + l->lock_val = 0; + + if (l->in_con_path) { + cpu->con_suspend--; + if (cpu->con_suspend == 0 && cpu->con_need_flush) + flush_console(); + } +} + +bool lock_recursive(struct lock *l) +{ + if (bust_locks) + return false; + + if ((l->lock_val & 1) && + (l->lock_val >> 32) == this_cpu()->pir) + return false; + + lock(l); + return true; +} + + +void init_locks(void) +{ + bust_locks = false; +} diff --git a/core/malloc.c b/core/malloc.c new file mode 100644 index 0000000..692a501 --- /dev/null +++ b/core/malloc.c @@ -0,0 +1,84 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Wrappers for malloc, et. al. */ +#include <mem_region.h> +#include <lock.h> +#include <string.h> +#include <mem_region-malloc.h> + +#define DEFAULT_ALIGN __alignof__(long) + +void *__memalign(size_t blocksize, size_t bytes, const char *location) +{ + void *p; + + lock(&mem_region_lock); + p = mem_alloc(&skiboot_heap, bytes, blocksize, location); + unlock(&mem_region_lock); + + return p; +} + +void *__malloc(size_t bytes, const char *location) +{ + return __memalign(DEFAULT_ALIGN, bytes, location); +} + +void __free(void *p, const char *location) +{ + lock(&mem_region_lock); + mem_free(&skiboot_heap, p, location); + unlock(&mem_region_lock); +} + +void *__realloc(void *ptr, size_t size, const char *location) +{ + void *newptr; + + /* Two classic malloc corner cases. */ + if (!size) { + __free(ptr, location); + return NULL; + } + if (!ptr) + return __malloc(size, location); + + lock(&mem_region_lock); + if (mem_resize(&skiboot_heap, ptr, size, location)) { + newptr = ptr; + } else { + newptr = mem_alloc(&skiboot_heap, size, DEFAULT_ALIGN, + location); + if (newptr) { + size_t copy = mem_size(&skiboot_heap, ptr); + if (copy > size) + copy = size; + memcpy(newptr, ptr, copy); + mem_free(&skiboot_heap, ptr, location); + } + } + unlock(&mem_region_lock); + return newptr; +} + +void *__zalloc(size_t bytes, const char *location) +{ + void *p = __malloc(bytes, location); + + if (p) + memset(p, 0, bytes); + return p; +} diff --git a/core/mem_region.c b/core/mem_region.c new file mode 100644 index 0000000..8904a18 --- /dev/null +++ b/core/mem_region.c @@ -0,0 +1,956 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <mem-map.h> +#include <libfdt_env.h> +#include <lock.h> +#include <device.h> +#include <cpu.h> +#include <affinity.h> +#include <types.h> +#include <mem_region.h> +#include <mem_region-malloc.h> + +/* Memory poisoning on free (if POISON_MEM_REGION set to 1) */ +#define POISON_MEM_REGION 0 +#define POISON_MEM_REGION_WITH 0x99 +#define POISON_MEM_REGION_LIMIT 1*1024*1024*1024 + +struct lock mem_region_lock = LOCK_UNLOCKED; + +static struct list_head regions = LIST_HEAD_INIT(regions); + +static struct mem_region skiboot_os_reserve = { + .name = "ibm,os-reserve", + .start = 0, + .len = SKIBOOT_BASE, + .type = REGION_OS, +}; + +struct mem_region skiboot_heap = { + .name = "ibm,firmware-heap", + .start = HEAP_BASE, + .len = HEAP_SIZE, + .type = REGION_SKIBOOT_HEAP, +}; + +static struct mem_region skiboot_code_and_text = { + .name = "ibm,firmware-code", + .start = SKIBOOT_BASE, + .len = HEAP_BASE - SKIBOOT_BASE, + .type = REGION_SKIBOOT_FIRMWARE, +}; + +static struct mem_region skiboot_after_heap = { + .name = "ibm,firmware-data", + .start = HEAP_BASE + HEAP_SIZE, + .len = SKIBOOT_BASE + SKIBOOT_SIZE - (HEAP_BASE + HEAP_SIZE), + .type = REGION_SKIBOOT_FIRMWARE, +}; + +static struct mem_region skiboot_cpu_stacks = { + .name = "ibm,firmware-stacks", + .start = CPU_STACKS_BASE, + .len = 0, /* TBA */ + .type = REGION_SKIBOOT_FIRMWARE, +}; + +struct alloc_hdr { + bool free : 1; + bool prev_free : 1; + unsigned long num_longs : BITS_PER_LONG-2; /* Including header. */ + const char *location; +}; + +struct free_hdr { + struct alloc_hdr hdr; + struct list_node list; + /* ... unsigned long tailer; */ +}; + +#define ALLOC_HDR_LONGS (sizeof(struct alloc_hdr) / sizeof(long)) +#define ALLOC_MIN_LONGS (sizeof(struct free_hdr) / sizeof(long) + 1) + +/* Avoid ugly casts. */ +static void *region_start(const struct mem_region *region) +{ + return (void *)(unsigned long)region->start; +} + +/* Each free block has a tailer, so we can walk backwards. */ +static unsigned long *tailer(struct free_hdr *f) +{ + return (unsigned long *)f + f->hdr.num_longs - 1; +} + +/* This walks forward to the next hdr (or NULL if at the end). */ +static struct alloc_hdr *next_hdr(const struct mem_region *region, + const struct alloc_hdr *hdr) +{ + void *next; + + next = ((unsigned long *)hdr + hdr->num_longs); + if (next >= region_start(region) + region->len) + next = NULL; + return next; +} + +/* Creates free block covering entire region. */ +static void init_allocatable_region(struct mem_region *region) +{ + struct free_hdr *f = region_start(region); + assert(region->type == REGION_SKIBOOT_HEAP); + f->hdr.num_longs = region->len / sizeof(long); + f->hdr.free = true; + f->hdr.prev_free = false; + *tailer(f) = f->hdr.num_longs; + list_head_init(®ion->free_list); + list_add(®ion->free_list, &f->list); +} + +static void make_free(struct mem_region *region, struct free_hdr *f, + const char *location) +{ + struct alloc_hdr *next; +#if POISON_MEM_REGION == 1 + size_t poison_size= (void*)tailer(f) - (void*)(f+1); + + /* We only poison up to a limit, as otherwise boot is kinda slow */ + if (poison_size > POISON_MEM_REGION_LIMIT) { + poison_size = POISON_MEM_REGION_LIMIT; + } + + memset(f+1, POISON_MEM_REGION_WITH, poison_size); +#endif + if (f->hdr.prev_free) { + struct free_hdr *prev; + unsigned long *prev_tailer = (unsigned long *)f - 1; + + assert(*prev_tailer); + prev = (void *)((unsigned long *)f - *prev_tailer); + assert(prev->hdr.free); + assert(!prev->hdr.prev_free); + + /* Expand to cover the one we just freed. */ + prev->hdr.num_longs += f->hdr.num_longs; + f = prev; + } else { + f->hdr.free = true; + f->hdr.location = location; + list_add(®ion->free_list, &f->list); + } + + /* Fix up tailer. */ + *tailer(f) = f->hdr.num_longs; + + /* If next is free, coalesce it */ + next = next_hdr(region, &f->hdr); + if (next) { + next->prev_free = true; + if (next->free) { + struct free_hdr *next_free = (void *)next; + list_del_from(®ion->free_list, &next_free->list); + /* Maximum of one level of recursion */ + make_free(region, next_free, location); + } + } +} + +/* Can we fit this many longs with this alignment in this free block? */ +static bool fits(struct free_hdr *f, size_t longs, size_t align, size_t *offset) +{ + *offset = 0; + + while (f->hdr.num_longs >= *offset + longs) { + size_t addr; + + addr = (unsigned long)f + + (*offset + ALLOC_HDR_LONGS) * sizeof(long); + if ((addr & (align - 1)) == 0) + return true; + + /* Don't make tiny chunks! */ + if (*offset == 0) + *offset = ALLOC_MIN_LONGS; + else + (*offset)++; + } + return false; +} + +static void discard_excess(struct mem_region *region, + struct alloc_hdr *hdr, size_t alloc_longs, + const char *location) +{ + /* Do we have excess? */ + if (hdr->num_longs > alloc_longs + ALLOC_MIN_LONGS) { + struct free_hdr *post; + + /* Set up post block. */ + post = (void *)hdr + alloc_longs * sizeof(long); + post->hdr.num_longs = hdr->num_longs - alloc_longs; + post->hdr.prev_free = false; + + /* Trim our block. */ + hdr->num_longs = alloc_longs; + + /* This coalesces as required. */ + make_free(region, post, location); + } +} + +static const char *hdr_location(const struct alloc_hdr *hdr) +{ + /* Corrupt: step carefully! */ + if (is_rodata(hdr->location)) + return hdr->location; + return "*CORRUPT*"; +} + +static void bad_header(const struct mem_region *region, + const struct alloc_hdr *hdr, + const char *during, + const char *location) +{ + /* Corrupt: step carefully! */ + if (is_rodata(hdr->location)) + prerror("%p (in %s) %s at %s, previously %s\n", + hdr-1, region->name, during, location, hdr->location); + else + prerror("%p (in %s) %s at %s, previously %p\n", + hdr-1, region->name, during, location, hdr->location); + abort(); +} + +static bool region_is_reserved(struct mem_region *region) +{ + return region->type != REGION_OS; +} + +static void mem_dump_allocs(void) +{ + struct mem_region *region; + struct alloc_hdr *hdr; + + /* Second pass: populate property data */ + printf("Memory regions:\n"); + list_for_each(®ions, region, list) { + if (region->type != REGION_SKIBOOT_HEAP) + continue; + printf(" 0x%012llx..%012llx : %s\n", + (long long)region->start, + (long long)(region->start + region->len - 1), + region->name); + if (region->free_list.n.next == NULL) { + printf(" no allocs\n"); + continue; + } + for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) { + if (hdr->free) + continue; + printf(" 0x%.8lx %s\n", hdr->num_longs * sizeof(long), + hdr_location(hdr)); + } + } +} + +static void *__mem_alloc(struct mem_region *region, size_t size, size_t align, + const char *location) +{ + size_t alloc_longs, offset; + struct free_hdr *f; + struct alloc_hdr *next; + + /* Align must be power of 2. */ + assert(!((align - 1) & align)); + + /* This should be a constant. */ + assert(is_rodata(location)); + + /* Unallocatable region? */ + if (region->type != REGION_SKIBOOT_HEAP) + return NULL; + + /* First allocation? */ + if (region->free_list.n.next == NULL) + init_allocatable_region(region); + + /* Don't do screwy sizes. */ + if (size > region->len) + return NULL; + + /* Don't do tiny alignments, we deal in long increments. */ + if (align < sizeof(long)) + align = sizeof(long); + + /* Convert size to number of longs, too. */ + alloc_longs = (size + sizeof(long)-1) / sizeof(long) + ALLOC_HDR_LONGS; + + /* Can't be too small for when we free it, either. */ + if (alloc_longs < ALLOC_MIN_LONGS) + alloc_longs = ALLOC_MIN_LONGS; + + /* Walk free list. */ + list_for_each(®ion->free_list, f, list) { + /* We may have to skip some to meet alignment. */ + if (fits(f, alloc_longs, align, &offset)) + goto found; + } + + return NULL; + +found: + assert(f->hdr.free); + assert(!f->hdr.prev_free); + + /* This block is no longer free. */ + list_del_from(®ion->free_list, &f->list); + f->hdr.free = false; + f->hdr.location = location; + + next = next_hdr(region, &f->hdr); + if (next) { + assert(next->prev_free); + next->prev_free = false; + } + + if (offset != 0) { + struct free_hdr *pre = f; + + f = (void *)f + offset * sizeof(long); + assert(f >= pre + 1); + + /* Set up new header. */ + f->hdr.num_longs = pre->hdr.num_longs - offset; + /* f->hdr.prev_free will be set by make_free below. */ + f->hdr.free = false; + f->hdr.location = location; + + /* Fix up old header. */ + pre->hdr.num_longs = offset; + pre->hdr.prev_free = false; + + /* This coalesces as required. */ + make_free(region, pre, location); + } + + /* We might be too long; put the rest back. */ + discard_excess(region, &f->hdr, alloc_longs, location); + + /* Clear tailer for debugging */ + *tailer(f) = 0; + + /* Their pointer is immediately after header. */ + return &f->hdr + 1; +} + +void *mem_alloc(struct mem_region *region, size_t size, size_t align, + const char *location) +{ + void *r = __mem_alloc(region, size, align, location); + + if (r) + return r; + + prerror("mem_alloc(0x%lx, 0x%lx, \"%s\") failed !\n", + size, align, location); + mem_dump_allocs(); + return NULL; +} + +void mem_free(struct mem_region *region, void *mem, const char *location) +{ + struct alloc_hdr *hdr; + + /* This should be a constant. */ + assert(is_rodata(location)); + + /* Freeing NULL is always a noop. */ + if (!mem) + return; + + /* Your memory is in the region, right? */ + assert(mem >= region_start(region) + sizeof(*hdr)); + assert(mem < region_start(region) + region->len); + + /* Grab header. */ + hdr = mem - sizeof(*hdr); + + if (hdr->free) + bad_header(region, hdr, "re-freed", location); + + make_free(region, (struct free_hdr *)hdr, location); +} + +size_t mem_size(const struct mem_region *region __unused, const void *ptr) +{ + const struct alloc_hdr *hdr = ptr - sizeof(*hdr); + return hdr->num_longs * sizeof(long); +} + +bool mem_resize(struct mem_region *region, void *mem, size_t len, + const char *location) +{ + struct alloc_hdr *hdr, *next; + struct free_hdr *f; + + /* This should be a constant. */ + assert(is_rodata(location)); + + /* Get header. */ + hdr = mem - sizeof(*hdr); + if (hdr->free) + bad_header(region, hdr, "resize", location); + + /* Round up size to multiple of longs. */ + len = (sizeof(*hdr) + len + sizeof(long) - 1) / sizeof(long); + + /* Can't be too small for when we free it, either. */ + if (len < ALLOC_MIN_LONGS) + len = ALLOC_MIN_LONGS; + + /* Shrinking is simple. */ + if (len <= hdr->num_longs) { + hdr->location = location; + discard_excess(region, hdr, len, location); + return true; + } + + /* Check if we can expand. */ + next = next_hdr(region, hdr); + if (!next || !next->free || hdr->num_longs + next->num_longs < len) + return false; + + /* OK, it's free and big enough, absorb it. */ + f = (struct free_hdr *)next; + list_del_from(®ion->free_list, &f->list); + hdr->num_longs += next->num_longs; + hdr->location = location; + + /* Update next prev_free */ + next = next_hdr(region, &f->hdr); + if (next) { + assert(next->prev_free); + next->prev_free = false; + } + + /* Clear tailer for debugging */ + *tailer(f) = 0; + + /* Now we might have *too* much. */ + discard_excess(region, hdr, len, location); + return true; +} + +bool mem_check(const struct mem_region *region) +{ + size_t frees = 0; + struct alloc_hdr *hdr, *prev_free = NULL; + struct free_hdr *f; + + /* Check it's sanely aligned. */ + if (region->start % sizeof(struct alloc_hdr)) { + prerror("Region '%s' not sanely aligned (%llx)\n", + region->name, (unsigned long long)region->start); + return false; + } + if ((long)region->len % sizeof(struct alloc_hdr)) { + prerror("Region '%s' not sane length (%llu)\n", + region->name, (unsigned long long)region->len); + return false; + } + + /* Not ours to play with, or empty? Don't do anything. */ + if (region->type != REGION_SKIBOOT_HEAP || + region->free_list.n.next == NULL) + return true; + + /* Walk linearly. */ + for (hdr = region_start(region); hdr; hdr = next_hdr(region, hdr)) { + if (hdr->num_longs < ALLOC_MIN_LONGS) { + prerror("Region '%s' %s %p (%s) size %zu\n", + region->name, hdr->free ? "free" : "alloc", + hdr, hdr_location(hdr), + hdr->num_longs * sizeof(long)); + return false; + } + if ((unsigned long)hdr + hdr->num_longs * sizeof(long) > + region->start + region->len) { + prerror("Region '%s' %s %p (%s) oversize %zu\n", + region->name, hdr->free ? "free" : "alloc", + hdr, hdr_location(hdr), + hdr->num_longs * sizeof(long)); + return false; + } + if (hdr->free) { + if (hdr->prev_free || prev_free) { + prerror("Region '%s' free %p (%s) has prev_free" + " %p (%s) %sset?\n", + region->name, hdr, hdr_location(hdr), + prev_free, + prev_free ? hdr_location(prev_free) + : "NULL", + hdr->prev_free ? "" : "un"); + return false; + } + prev_free = hdr; + frees ^= (unsigned long)hdr - region->start; + } else { + if (hdr->prev_free != (bool)prev_free) { + prerror("Region '%s' alloc %p (%s) has" + " prev_free %p %sset?\n", + region->name, hdr, hdr_location(hdr), + prev_free, hdr->prev_free ? "" : "un"); + return false; + } + prev_free = NULL; + } + } + + /* Now walk free list. */ + list_for_each(®ion->free_list, f, list) + frees ^= (unsigned long)f - region->start; + + if (frees) { + prerror("Region '%s' free list and walk do not match!\n", + region->name); + return false; + } + return true; +} + +static struct mem_region *new_region(const char *name, + uint64_t start, uint64_t len, + struct dt_node *mem_node, + enum mem_region_type type) +{ + struct mem_region *region; + + /* Avoid lock recursion, call mem_alloc directly. */ + region = mem_alloc(&skiboot_heap, + sizeof(*region), __alignof__(*region), __location__); + if (!region) + return NULL; + + region->name = name; + region->start = start; + region->len = len; + region->mem_node = mem_node; + region->type = type; + region->free_list.n.next = NULL; + + return region; +} + +/* We always split regions, so we only have to replace one. */ +static struct mem_region *split_region(struct mem_region *head, + uint64_t split_at, + enum mem_region_type type) +{ + struct mem_region *tail; + uint64_t end = head->start + head->len; + + tail = new_region(head->name, split_at, end - split_at, + head->mem_node, type); + /* Original region becomes head. */ + if (tail) + head->len -= tail->len; + + return tail; +} + +static bool intersects(const struct mem_region *region, uint64_t addr) +{ + return addr > region->start && + addr < region->start + region->len; +} + +static bool maybe_split(struct mem_region *r, uint64_t split_at) +{ + struct mem_region *tail; + + if (!intersects(r, split_at)) + return true; + + tail = split_region(r, split_at, r->type); + if (!tail) + return false; + + /* Tail add is important: we may need to split again! */ + list_add_tail(®ions, &tail->list); + return true; +} + +static bool overlaps(const struct mem_region *r1, const struct mem_region *r2) +{ + return (r1->start + r1->len > r2->start + && r1->start < r2->start + r2->len); +} + +static struct mem_region *get_overlap(const struct mem_region *region) +{ + struct mem_region *i; + + list_for_each(®ions, i, list) { + if (overlaps(region, i)) + return i; + } + return NULL; +} + +static bool add_region(struct mem_region *region) +{ + struct mem_region *r; + + /* First split any regions which intersect. */ + list_for_each(®ions, r, list) + if (!maybe_split(r, region->start) || + !maybe_split(r, region->start + region->len)) + return false; + + /* Now we have only whole overlaps, if any. */ + while ((r = get_overlap(region)) != NULL) { + assert(r->start == region->start); + assert(r->len == region->len); + list_del_from(®ions, &r->list); + /* We already hold mem_region lock */ + mem_free(&skiboot_heap, r, __location__); + } + + /* Finally, add in our own region. */ + list_add(®ions, ®ion->list); + return true; +} + +void mem_reserve(const char *name, uint64_t start, uint64_t len) +{ + struct mem_region *region; + bool added; + + lock(&mem_region_lock); + region = new_region(name, start, len, NULL, REGION_RESERVED); + assert(region); + added = add_region(region); + assert(added); + unlock(&mem_region_lock); +} + +static bool matches_chip_id(const __be32 ids[], size_t num, u32 chip_id) +{ + size_t i; + + for (i = 0; i < num; i++) + if (be32_to_cpu(ids[i]) == chip_id) + return true; + + return false; +} + +void *__local_alloc(unsigned int chip_id, size_t size, size_t align, + const char *location) +{ + struct mem_region *region; + void *p = NULL; + bool use_local = true; + + lock(&mem_region_lock); + +restart: + list_for_each(®ions, region, list) { + const struct dt_property *prop; + const __be32 *ids; + + if (region->type != REGION_SKIBOOT_HEAP) + continue; + + /* Don't allocate from normal heap. */ + if (region == &skiboot_heap) + continue; + + /* First pass, only match node local regions */ + if (use_local) { + if (!region->mem_node) + continue; + prop = dt_find_property(region->mem_node, "ibm,chip-id"); + ids = (const __be32 *)prop->prop; + if (!matches_chip_id(ids, prop->len/sizeof(u32), + chip_id)) + continue; + } + + /* Second pass, match anything */ + p = mem_alloc(region, size, align, location); + if (p) + break; + } + + /* + * If we can't allocate the memory block from the expected + * node, we bail to any one that can accomodate our request. + */ + if (!p && use_local) { + use_local = false; + goto restart; + } + + unlock(&mem_region_lock); + + return p; +} + +struct mem_region *find_mem_region(const char *name) +{ + struct mem_region *region; + + list_for_each(®ions, region, list) { + if (streq(region->name, name)) + return region; + } + return NULL; +} + +/* Trawl through device tree, create memory regions from nodes. */ +void mem_region_init(void) +{ + const struct dt_property *names, *ranges; + struct mem_region *region; + struct dt_node *i; + + /* Ensure we have no collision between skiboot core and our heap */ + extern char _end[]; + BUILD_ASSERT(HEAP_BASE >= (uint64_t)_end); + + /* + * Add associativity properties outside of the lock + * to avoid recursive locking caused by allocations + * done by add_chip_dev_associativity() + */ + dt_for_each_node(dt_root, i) { + if (!dt_has_node_property(i, "device_type", "memory")) + continue; + + /* Add associativity properties */ + add_chip_dev_associativity(i); + } + + /* Add each memory node. */ + dt_for_each_node(dt_root, i) { + uint64_t start, len; + char *rname; +#define NODE_REGION_PREFIX "ibm,firmware-allocs-" + + if (!dt_has_node_property(i, "device_type", "memory")) + continue; + rname = zalloc(strlen(i->name) + strlen(NODE_REGION_PREFIX) + 1); + strcat(rname, NODE_REGION_PREFIX); + strcat(rname, i->name); + start = dt_get_address(i, 0, &len); + lock(&mem_region_lock); + region = new_region(rname, start, len, i, REGION_SKIBOOT_HEAP); + if (!region) { + prerror("MEM: Could not add mem region %s!\n", i->name); + abort(); + } + list_add(®ions, ®ion->list); + unlock(&mem_region_lock); + } + + /* Now we know how many CPU stacks we have, fix that up. */ + skiboot_cpu_stacks.len = (cpu_max_pir + 1) * STACK_SIZE; + + lock(&mem_region_lock); + + /* Now carve out our own reserved areas. */ + if (!add_region(&skiboot_os_reserve) || + !add_region(&skiboot_code_and_text) || + !add_region(&skiboot_heap) || + !add_region(&skiboot_after_heap) || + !add_region(&skiboot_cpu_stacks)) { + prerror("Out of memory adding skiboot reserved areas\n"); + abort(); + } + + /* Add reserved ranges from the DT */ + names = dt_find_property(dt_root, "reserved-names"); + ranges = dt_find_property(dt_root, "reserved-ranges"); + if (names && ranges) { + const uint64_t *range; + int n, len; + + range = (const void *)ranges->prop; + + for (n = 0; n < names->len; n += len, range += 2) { + char *name; + + len = strlen(names->prop + n) + 1; + + name = mem_alloc(&skiboot_heap, len, + __alignof__(*name), __location__); + memcpy(name, names->prop + n, len); + + region = new_region(name, + dt_get_number(range, 2), + dt_get_number(range + 1, 2), + NULL, REGION_RESERVED); + list_add(®ions, ®ion->list); + } + } else if (names || ranges) { + prerror("Invalid properties: reserved-names=%p " + "with reserved-ranges=%p\n", + names, ranges); + abort(); + } + + unlock(&mem_region_lock); + + /* We generate the reservation properties from our own region list, + * which now includes the existing data. + */ + if (names) + dt_del_property(dt_root, (struct dt_property *)names); + if (ranges) + dt_del_property(dt_root, (struct dt_property *)ranges); +} + +static uint64_t allocated_length(const struct mem_region *r) +{ + struct free_hdr *f, *last = NULL; + + /* No allocations at all? */ + if (r->free_list.n.next == NULL) + return 0; + + /* Find last free block. */ + list_for_each(&r->free_list, f, list) + if (f > last) + last = f; + + /* No free blocks? */ + if (!last) + return r->len; + + /* Last free block isn't at end? */ + if (next_hdr(r, &last->hdr)) + return r->len; + return (unsigned long)last - r->start; +} + +/* Separate out allocated sections into their own region. */ +void mem_region_release_unused(void) +{ + struct mem_region *r; + + lock(&mem_region_lock); + + printf("Releasing unused memory:\n"); + list_for_each(®ions, r, list) { + uint64_t used_len; + + /* If it's not allocatable, ignore it. */ + if (r->type != REGION_SKIBOOT_HEAP) + continue; + + used_len = allocated_length(r); + + printf(" %s: %llu/%llu used\n", + r->name, (long long)used_len, (long long)r->len); + + /* We keep the skiboot heap. */ + if (r == &skiboot_heap) + continue; + + /* Nothing used? Whole thing is for Linux. */ + if (used_len == 0) + r->type = REGION_OS; + /* Partially used? Split region. */ + else if (used_len != r->len) { + struct mem_region *for_linux; + struct free_hdr *last = region_start(r) + used_len; + + /* Remove the final free block. */ + list_del_from(&r->free_list, &last->list); + + for_linux = split_region(r, r->start + used_len, + REGION_OS); + if (!for_linux) { + prerror("OOM splitting mem node %s for linux\n", + r->name); + abort(); + } + list_add(®ions, &for_linux->list); + } + } + unlock(&mem_region_lock); +} + +void mem_region_add_dt_reserved(void) +{ + int names_len, ranges_len, len; + struct mem_region *region; + void *names, *ranges; + uint64_t *range; + char *name; + + names_len = 0; + ranges_len = 0; + + lock(&mem_region_lock); + + /* First pass: calculate length of property data */ + list_for_each(®ions, region, list) { + if (!region_is_reserved(region)) + continue; + names_len += strlen(region->name) + 1; + ranges_len += 2 * sizeof(uint64_t); + } + + /* Allocate property data with mem_alloc; malloc() acquires + * mem_region_lock */ + names = mem_alloc(&skiboot_heap, names_len, + __alignof__(*names), __location__); + ranges = mem_alloc(&skiboot_heap, ranges_len, + __alignof__(*ranges), __location__); + + name = names; + range = ranges; + + printf("Reserved regions:\n"); + /* Second pass: populate property data */ + list_for_each(®ions, region, list) { + if (!region_is_reserved(region)) + continue; + len = strlen(region->name) + 1; + memcpy(name, region->name, len); + name += len; + + printf(" 0x%012llx..%012llx : %s\n", + (long long)region->start, + (long long)(region->start + region->len - 1), + region->name); + + range[0] = cpu_to_fdt64(region->start); + range[1] = cpu_to_fdt64(region->len); + range += 2; + } + unlock(&mem_region_lock); + + dt_add_property(dt_root, "reserved-names", names, names_len); + dt_add_property(dt_root, "reserved-ranges", ranges, ranges_len); + + free(names); + free(ranges); +} diff --git a/core/nvram.c b/core/nvram.c new file mode 100644 index 0000000..f25d6aa --- /dev/null +++ b/core/nvram.c @@ -0,0 +1,248 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <fsp.h> +#include <opal.h> +#include <lock.h> +#include <device.h> +#include <platform.h> + +static void *nvram_image; +static uint32_t nvram_size; +static bool nvram_ready; + +static int64_t opal_read_nvram(uint64_t buffer, uint64_t size, uint64_t offset) +{ + if (!nvram_ready) + return OPAL_HARDWARE; + if (offset >= nvram_size || (offset + size) > nvram_size) + return OPAL_PARAMETER; + + memcpy((void *)buffer, nvram_image + offset, size); + return OPAL_SUCCESS; +} +opal_call(OPAL_READ_NVRAM, opal_read_nvram, 3); + +static int64_t opal_write_nvram(uint64_t buffer, uint64_t size, uint64_t offset) +{ + if (!nvram_ready) + return OPAL_HARDWARE; + if (offset >= nvram_size || (offset + size) > nvram_size) + return OPAL_PARAMETER; + memcpy(nvram_image + offset, (void *)buffer, size); + if (platform.nvram_write) + platform.nvram_write(offset, nvram_image + offset, size); + return OPAL_SUCCESS; +} +opal_call(OPAL_WRITE_NVRAM, opal_write_nvram, 3); + +struct chrp_nvram_hdr { + uint8_t sig; + uint8_t cksum; + uint16_t len; + char name[12]; +}; + +#define NVRAM_SIG_FW_PRIV 0x51 +#define NVRAM_SIG_SYSTEM 0x70 +#define NVRAM_SIG_FREE 0x7f + +#define NVRAM_NAME_COMMON "common" +#define NVRAM_NAME_FW_PRIV "ibm,skiboot" +#define NVRAM_NAME_FREE "wwwwwwwwwwww" + +/* 64k should be enough, famous last words... */ +#define NVRAM_SIZE_COMMON 0x10000 + +/* 4k should be enough, famous last words... */ +#define NVRAM_SIZE_FW_PRIV 0x1000 + +static uint8_t chrp_nv_cksum(struct chrp_nvram_hdr *hdr) +{ + struct chrp_nvram_hdr h_copy = *hdr; + uint8_t b_data, i_sum, c_sum; + uint8_t *p = (uint8_t *)&h_copy; + unsigned int nbytes = sizeof(h_copy); + + h_copy.cksum = 0; + for (c_sum = 0; nbytes; nbytes--) { + b_data = *(p++); + i_sum = c_sum + b_data; + if (i_sum < c_sum) + i_sum++; + c_sum = i_sum; + } + return c_sum; +} + +static void nvram_format(void) +{ + struct chrp_nvram_hdr *h; + unsigned int offset = 0; + + prerror("NVRAM: Re-initializing\n"); + memset(nvram_image, 0, nvram_size); + + /* Create private partition */ + h = nvram_image + offset; + h->sig = NVRAM_SIG_FW_PRIV; + h->len = NVRAM_SIZE_FW_PRIV >> 4; + strcpy(h->name, NVRAM_NAME_FW_PRIV); + h->cksum = chrp_nv_cksum(h); + offset += NVRAM_SIZE_FW_PRIV; + + /* Create common partition */ + h = nvram_image + offset; + h->sig = NVRAM_SIG_SYSTEM; + h->len = NVRAM_SIZE_COMMON >> 4; + strcpy(h->name, NVRAM_NAME_COMMON); + h->cksum = chrp_nv_cksum(h); + offset += NVRAM_SIZE_COMMON; + + /* Create free space partition */ + h = nvram_image + offset; + h->sig = NVRAM_SIG_FREE; + h->len = (nvram_size - offset) >> 4; + strncpy(h->name, NVRAM_NAME_FREE, 12); + h->cksum = chrp_nv_cksum(h); + + /* Write the whole thing back */ + if (platform.nvram_write) + platform.nvram_write(0, nvram_image, nvram_size); +} + +/* + * Check that the nvram partition layout is sane and that it + * contains our required partitions. If not, we re-format the + * lot of it + */ +static void nvram_check(void) +{ + unsigned int offset = 0; + bool found_common = false; + bool found_skiboot = false; + + while (offset + sizeof(struct chrp_nvram_hdr) < nvram_size) { + struct chrp_nvram_hdr *h = nvram_image + offset; + + if (chrp_nv_cksum(h) != h->cksum) { + prerror("NVRAM: Partition at offset 0x%x" + " has bad checksum\n", offset); + goto failed; + } + if (h->len < 1) { + prerror("NVRAM: Partition at offset 0x%x" + " has incorrect 0 length\n", offset); + goto failed; + } + + if (h->sig == NVRAM_SIG_SYSTEM && + strcmp(h->name, NVRAM_NAME_COMMON) == 0) + found_common = true; + + if (h->sig == NVRAM_SIG_FW_PRIV && + strcmp(h->name, NVRAM_NAME_FW_PRIV) == 0) + found_skiboot = true; + + offset += h->len << 4; + if (offset > nvram_size) { + prerror("NVRAM: Partition at offset 0x%x" + " extends beyond end of nvram !\n", offset); + goto failed; + } + } + if (!found_common) { + prerror("NVRAM: Common partition not found !\n"); + goto failed; + } + if (!found_skiboot) { + prerror("NVRAM: Skiboot private partition " + "not found !\n"); + goto failed; + } + + prerror("NVRAM: Layout appears sane\n"); + return; + failed: + nvram_format(); +} + +void nvram_read_complete(bool success) +{ + struct dt_node *np; + + /* Read not successful, error out and free the buffer */ + if (!success) { + free(nvram_image); + nvram_size = 0; + return; + } + + /* Check and maybe format nvram */ + nvram_check(); + + /* Add nvram node */ + np = dt_new(opal_node, "nvram"); + dt_add_property_cells(np, "#bytes", nvram_size); + dt_add_property_string(np, "compatible", "ibm,opal-nvram"); + + /* Mark ready */ + nvram_ready = true; +} + +void nvram_init(void) +{ + int rc; + + if (!platform.nvram_info) + return; + rc = platform.nvram_info(&nvram_size); + if (rc) { + prerror("NVRAM: Error %d retrieving nvram info\n", rc); + return; + } + printf("NVRAM: Size is %d KB\n", nvram_size >> 10); + if (nvram_size > 0x100000) { + printf("NVRAM: Cropping to 1MB !\n"); + nvram_size = 0x100000; + } + + /* + * We allocate the nvram image with 4k alignment to make the + * FSP backend job's easier + */ + nvram_image = memalign(0x1000, nvram_size); + if (!nvram_image) { + prerror("NVRAM: Failed to allocate nvram image\n"); + nvram_size = 0; + return; + } + + /* Read it in */ + rc = platform.nvram_start_read(nvram_image, 0, nvram_size); + if (rc) { + prerror("NVRAM: Failed to read NVRAM from FSP !\n"); + nvram_size = 0; + free(nvram_image); + return; + } + + /* + * We'll get called back later (or recursively from + * nvram_start_read) in nvram_read_complete() + */ +} diff --git a/core/opal-msg.c b/core/opal-msg.c new file mode 100644 index 0000000..f033b76 --- /dev/null +++ b/core/opal-msg.c @@ -0,0 +1,167 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <skiboot.h> +#include <opal-msg.h> +#include <lock.h> + +#define OPAL_MAX_MSGS (OPAL_MSG_TYPE_MAX + OPAL_MAX_ASYNC_COMP - 1) +#define OPAL_MSG_PREFIX "opalmsg: " + + +struct opal_msg_entry { + struct list_node link; + void (*consumed)(void *data); + void *data; + struct opal_msg msg; +}; + +static LIST_HEAD(msg_free_list); +static LIST_HEAD(msg_pending_list); + +static struct lock opal_msg_lock = LOCK_UNLOCKED; + +int _opal_queue_msg(enum OpalMessageType msg_type, void *data, + void (*consumed)(void *data), size_t num_params, + const u64 *params) +{ + struct opal_msg_entry *entry; + + lock(&opal_msg_lock); + + entry = list_pop(&msg_free_list, struct opal_msg_entry, link); + if (!entry) { + prerror(OPAL_MSG_PREFIX "No available node in the free list, allocating\n"); + entry = zalloc(sizeof(struct opal_msg_entry)); + if (!entry) { + prerror(OPAL_MSG_PREFIX "Allocation failed\n"); + unlock(&opal_msg_lock); + return OPAL_RESOURCE; + } + } + + entry->consumed = consumed; + entry->data = data; + entry->msg.msg_type = msg_type; + + if (num_params > ARRAY_SIZE(entry->msg.params)) { + prerror(OPAL_MSG_PREFIX "Discarding extra parameters\n"); + num_params = ARRAY_SIZE(entry->msg.params); + } + memcpy(entry->msg.params, params, num_params*sizeof(u64)); + + list_add_tail(&msg_pending_list, &entry->link); + opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, + OPAL_EVENT_MSG_PENDING); + + unlock(&opal_msg_lock); + + return 0; +} + +static int64_t opal_get_msg(uint64_t *buffer, uint64_t size) +{ + struct opal_msg_entry *entry; + void (*callback)(void *data); + void *data; + + if (size < sizeof(struct opal_msg) || !buffer) + return OPAL_PARAMETER; + + lock(&opal_msg_lock); + + entry = list_pop(&msg_pending_list, struct opal_msg_entry, link); + if (!entry) { + unlock(&opal_msg_lock); + return OPAL_RESOURCE; + } + + memcpy(buffer, &entry->msg, sizeof(entry->msg)); + callback = entry->consumed; + data = entry->data; + + list_add(&msg_free_list, &entry->link); + if (list_empty(&msg_pending_list)) + opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, 0); + + unlock(&opal_msg_lock); + + if (callback) + callback(data); + + return OPAL_SUCCESS; +} +opal_call(OPAL_GET_MSG, opal_get_msg, 2); + +static int64_t opal_check_completion(uint64_t *buffer, uint64_t size, + uint64_t token) +{ + struct opal_msg_entry *entry, *next_entry; + void (*callback)(void *data) = NULL; + int rc = OPAL_BUSY; + void *data = NULL; + + lock(&opal_msg_lock); + list_for_each_safe(&msg_pending_list, entry, next_entry, link) { + if (entry->msg.msg_type == OPAL_MSG_ASYNC_COMP && + entry->msg.params[0] == token) { + list_del(&entry->link); + callback = entry->consumed; + data = entry->data; + list_add(&msg_free_list, &entry->link); + if (list_empty(&msg_pending_list)) + opal_update_pending_evt(OPAL_EVENT_MSG_PENDING, + 0); + rc = OPAL_SUCCESS; + break; + } + } + + if (rc == OPAL_SUCCESS && size >= sizeof(struct opal_msg)) + memcpy(buffer, &entry->msg, sizeof(entry->msg)); + + unlock(&opal_msg_lock); + + if (callback) + callback(data); + + return rc; + +} +opal_call(OPAL_CHECK_ASYNC_COMPLETION, opal_check_completion, 3); + +void opal_init_msg(void) +{ + struct opal_msg_entry *entry; + int i; + + for (i = 0; i < OPAL_MAX_MSGS; i++, entry++) { + entry = zalloc(sizeof(*entry)); + if (!entry) + goto err; + list_add_tail(&msg_free_list, &entry->link); + } + return; + +err: + for (; i > 0; i--) { + entry = list_pop(&msg_free_list, struct opal_msg_entry, link); + if (entry) + free(entry); + } +} + diff --git a/core/opal.c b/core/opal.c new file mode 100644 index 0000000..2727fd5 --- /dev/null +++ b/core/opal.c @@ -0,0 +1,308 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <opal.h> +#include <stack.h> +#include <lock.h> +#include <fsp.h> +#include <cpu.h> +#include <interrupts.h> +#include <op-panel.h> +#include <device.h> +#include <console.h> +#include <trace.h> +#include <timebase.h> +#include <affinity.h> +#include <opal-msg.h> + +/* Pending events to signal via opal_poll_events */ +uint64_t opal_pending_events; + +/* OPAL dispatch table defined in head.S */ +extern uint64_t opal_branch_table[]; + +/* Number of args expected for each call. */ +static u8 opal_num_args[OPAL_LAST+1]; + +/* OPAL anchor node */ +struct dt_node *opal_node; + +extern uint32_t attn_trigger; +extern uint32_t hir_trigger; + +void opal_table_init(void) +{ + struct opal_table_entry *s = __opal_table_start; + struct opal_table_entry *e = __opal_table_end; + + printf("OPAL table: %p .. %p, branch table: %p\n", + s, e, opal_branch_table); + while(s < e) { + uint64_t *func = s->func; + opal_branch_table[s->token] = *func; + opal_num_args[s->token] = s->nargs; + s++; + } +} + +/* Called from head.S, thus no prototype */ +long opal_bad_token(uint64_t token); + +long opal_bad_token(uint64_t token) +{ + prerror("OPAL: Called with bad token %lld !\n", token); + + return OPAL_PARAMETER; +} + +/* Called from head.S, thus no prototype */ +void opal_trace_entry(struct stack_frame *eframe); + +/* FIXME: Do this in asm */ +void opal_trace_entry(struct stack_frame *eframe) +{ + union trace t; + unsigned nargs; + + if (this_cpu()->pir != mfspr(SPR_PIR)) { + printf("CPU MISMATCH ! PIR=%04lx cpu @%p -> pir=%04x\n", + mfspr(SPR_PIR), this_cpu(), this_cpu()->pir); + abort(); + } + if (eframe->gpr[0] > OPAL_LAST) + nargs = 0; + else + nargs = opal_num_args[eframe->gpr[0]]; + + t.opal.token = eframe->gpr[0]; + t.opal.lr = eframe->lr; + t.opal.sp = eframe->gpr[1]; + memcpy(t.opal.r3_to_11, &eframe->gpr[3], nargs*sizeof(u64)); + + trace_add(&t, TRACE_OPAL, offsetof(struct trace_opal, r3_to_11[nargs])); +} + +void __opal_register(uint64_t token, void *func, unsigned int nargs) +{ + uint64_t *opd = func; + + assert(token <= OPAL_LAST); + + opal_branch_table[token] = *opd; + opal_num_args[token] = nargs; +} + +static void add_opal_firmware_node(void) +{ + struct dt_node *firmware = dt_new(opal_node, "firmware"); + + dt_add_property_string(firmware, "compatible", "ibm,opal-firmware"); + dt_add_property_string(firmware, "name", "firmware"); + dt_add_property_string(firmware, "git-id", gitid); +} + +void add_opal_node(void) +{ + uint64_t base, entry, size; + extern uint32_t opal_entry; + + /* XXX TODO: Reorg this. We should create the base OPAL + * node early on, and have the various sub modules populate + * their own entries (console etc...) + * + * The logic of which console backend to use should be + * extracted + */ + + entry = (uint64_t)&opal_entry; + base = SKIBOOT_BASE; + size = (CPU_STACKS_BASE + + (cpu_max_pir + 1) * STACK_SIZE) - SKIBOOT_BASE; + + opal_node = dt_new(dt_root, "ibm,opal"); + dt_add_property_cells(opal_node, "#address-cells", 0); + dt_add_property_cells(opal_node, "#size-cells", 0); + dt_add_property_strings(opal_node, "compatible", "ibm,opal-v2", + "ibm,opal-v3"); + dt_add_property_cells(opal_node, "opal-msg-async-num", OPAL_MAX_ASYNC_COMP); + dt_add_property_cells(opal_node, "opal-msg-size", sizeof(struct opal_msg)); + dt_add_property_u64(opal_node, "opal-base-address", base); + dt_add_property_u64(opal_node, "opal-entry-address", entry); + dt_add_property_u64(opal_node, "opal-runtime-size", size); + + add_opal_firmware_node(); + add_associativity_ref_point(); + memcons_add_properties(); + add_cpu_idle_state_properties(); +} + +void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values) +{ + static struct lock evt_lock = LOCK_UNLOCKED; + uint64_t new_evts; + + /* XXX FIXME: Use atomics instead ??? Or caller locks (con_lock ?) */ + lock(&evt_lock); + new_evts = (opal_pending_events & ~evt_mask) | evt_values; +#ifdef OPAL_TRACE_EVT_CHG + printf("OPAL: Evt change: 0x%016llx -> 0x%016llx\n", + opal_pending_events, new_evts); +#endif + opal_pending_events = new_evts; + unlock(&evt_lock); +} + + +static uint64_t opal_test_func(uint64_t arg) +{ + printf("OPAL: Test function called with arg 0x%llx\n", arg); + + return 0xfeedf00d; +} +opal_call(OPAL_TEST, opal_test_func, 1); + +struct opal_poll_entry { + struct list_node link; + void (*poller)(void *data); + void *data; +}; + +static struct list_head opal_pollers = LIST_HEAD_INIT(opal_pollers); +static struct lock opal_poll_lock = LOCK_UNLOCKED; + +void opal_add_poller(void (*poller)(void *data), void *data) +{ + struct opal_poll_entry *ent; + + ent = zalloc(sizeof(struct opal_poll_entry)); + assert(ent); + ent->poller = poller; + ent->data = data; + lock(&opal_poll_lock); + list_add_tail(&opal_pollers, &ent->link); + unlock(&opal_poll_lock); +} + +void opal_del_poller(void (*poller)(void *data)) +{ + struct opal_poll_entry *ent; + + lock(&opal_poll_lock); + list_for_each(&opal_pollers, ent, link) { + if (ent->poller == poller) { + list_del(&ent->link); + free(ent); + break; + } + } + unlock(&opal_poll_lock); +} + +static int64_t opal_poll_events(uint64_t *outstanding_event_mask) +{ + struct opal_poll_entry *poll_ent; + + /* Check if we need to trigger an attn for test use */ + if (attn_trigger == 0xdeadbeef) { + printf("Triggering attn\n"); + assert(false); + } + + /* Test the host initiated reset */ + if (hir_trigger == 0xdeadbeef) { + fsp_trigger_reset(); + hir_trigger = 0; + } + + /* + * Only run the pollers if they aren't already running + * on another CPU + */ + if (try_lock(&opal_poll_lock)) { + list_for_each(&opal_pollers, poll_ent, link) + poll_ent->poller(poll_ent->data); + unlock(&opal_poll_lock); + } + + if (outstanding_event_mask) + *outstanding_event_mask = opal_pending_events; + + return OPAL_SUCCESS; +} +opal_call(OPAL_POLL_EVENTS, opal_poll_events, 1); + +static int64_t opal_check_token(uint64_t token) +{ + if (token > OPAL_LAST) + return OPAL_TOKEN_ABSENT; + + if (opal_branch_table[token]) + return OPAL_TOKEN_PRESENT; + + return OPAL_TOKEN_ABSENT; +} +opal_call(OPAL_CHECK_TOKEN, opal_check_token, 1); + +struct opal_sync_entry { + struct list_node link; + bool (*notify)(void *data); + void *data; +}; + +static struct list_head opal_syncers = LIST_HEAD_INIT(opal_syncers); + +void opal_add_host_sync_notifier(bool (*notify)(void *data), void *data) +{ + struct opal_sync_entry *ent; + + ent = zalloc(sizeof(struct opal_sync_entry)); + assert(ent); + ent->notify = notify; + ent->data = data; + list_add_tail(&opal_syncers, &ent->link); +} + +void opal_del_host_sync_notifier(bool (*notify)(void *data)) +{ + struct opal_sync_entry *ent; + + list_for_each(&opal_syncers, ent, link) { + if (ent->notify == notify) { + list_del(&ent->link); + free(ent); + return; + } + } +} + +/* + * OPAL call to handle host kexec'ing scenario + */ +static int64_t opal_sync_host_reboot(void) +{ + struct opal_sync_entry *ent; + bool ret = true; + + list_for_each(&opal_syncers, ent, link) + ret &= ent->notify(ent->data); + + if (ret) + return OPAL_SUCCESS; + else + return OPAL_BUSY_EVENT; +} +opal_call(OPAL_SYNC_HOST_REBOOT, opal_sync_host_reboot, 0); diff --git a/core/pci-opal.c b/core/pci-opal.c new file mode 100644 index 0000000..ee534cc --- /dev/null +++ b/core/pci-opal.c @@ -0,0 +1,666 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <pci.h> +#include <pci-cfg.h> +#include <timebase.h> +#include <lock.h> + +#define OPAL_PCICFG_ACCESS(op, cb, type) \ +static int64_t opal_pci_config_##op(uint64_t phb_id, \ + uint64_t bus_dev_func, \ + uint64_t offset, type data) \ +{ \ + struct phb *phb = pci_get_phb(phb_id); \ + int64_t rc; \ + \ + if (!phb) \ + return OPAL_PARAMETER; \ + phb->ops->lock(phb); \ + rc = phb->ops->cfg_##cb(phb, bus_dev_func, offset, data); \ + phb->ops->unlock(phb); \ + pci_put_phb(phb); \ + \ + return rc; \ +} + +OPAL_PCICFG_ACCESS(read_byte, read8, uint8_t *) +OPAL_PCICFG_ACCESS(read_half_word, read16, uint16_t *) +OPAL_PCICFG_ACCESS(read_word, read32, uint32_t *) +OPAL_PCICFG_ACCESS(write_byte, write8, uint8_t) +OPAL_PCICFG_ACCESS(write_half_word, write16, uint16_t) +OPAL_PCICFG_ACCESS(write_word, write32, uint32_t) + +opal_call(OPAL_PCI_CONFIG_READ_BYTE, opal_pci_config_read_byte, 4); +opal_call(OPAL_PCI_CONFIG_READ_HALF_WORD, opal_pci_config_read_half_word, 4); +opal_call(OPAL_PCI_CONFIG_READ_WORD, opal_pci_config_read_word, 4); +opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, opal_pci_config_write_byte, 4); +opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, opal_pci_config_write_half_word, 4); +opal_call(OPAL_PCI_CONFIG_WRITE_WORD, opal_pci_config_write_word, 4); + +static int64_t opal_pci_eeh_freeze_status(uint64_t phb_id, uint64_t pe_number, + uint8_t *freeze_state, + uint16_t *pci_error_type, + uint64_t *phb_status) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->eeh_freeze_status) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->eeh_freeze_status(phb, pe_number, freeze_state, + pci_error_type, NULL, phb_status); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_EEH_FREEZE_STATUS, opal_pci_eeh_freeze_status, 5); + +static int64_t opal_pci_eeh_freeze_clear(uint64_t phb_id, uint64_t pe_number, + uint64_t eeh_action_token) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->eeh_freeze_clear) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->eeh_freeze_clear(phb, pe_number, eeh_action_token); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, opal_pci_eeh_freeze_clear, 3); + +static int64_t opal_pci_phb_mmio_enable(uint64_t phb_id, uint16_t window_type, + uint16_t window_num, uint16_t enable) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->phb_mmio_enable) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->phb_mmio_enable(phb, window_type, window_num, enable); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_PHB_MMIO_ENABLE, opal_pci_phb_mmio_enable, 4); + +static int64_t opal_pci_set_phb_mem_window(uint64_t phb_id, + uint16_t window_type, + uint16_t window_num, + uint64_t addr, + uint64_t pci_addr, + uint64_t size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_phb_mem_window) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_phb_mem_window(phb, window_type, window_num, + addr, pci_addr, size); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PHB_MEM_WINDOW, opal_pci_set_phb_mem_window, 6); + +static int64_t opal_pci_map_pe_mmio_window(uint64_t phb_id, uint16_t pe_number, + uint16_t window_type, + uint16_t window_num, + uint16_t segment_num) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->map_pe_mmio_window) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->map_pe_mmio_window(phb, pe_number, window_type, + window_num, segment_num); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, opal_pci_map_pe_mmio_window, 5); + +static int64_t opal_pci_set_phb_table_memory(uint64_t phb_id __unused, + uint64_t rtt_addr __unused, + uint64_t ivt_addr __unused, + uint64_t ivt_len __unused, + uint64_t rej_array_addr __unused, + uint64_t peltv_addr __unused) +{ + /* IODA2 (P8) stuff, TODO */ + return OPAL_UNSUPPORTED; +} +opal_call(OPAL_PCI_SET_PHB_TABLE_MEMORY, opal_pci_set_phb_table_memory, 6); + +static int64_t opal_pci_set_pe(uint64_t phb_id, uint64_t pe_number, + uint64_t bus_dev_func, uint8_t bus_compare, + uint8_t dev_compare, uint8_t func_compare, + uint8_t pe_action) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_pe) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_pe(phb, pe_number, bus_dev_func, bus_compare, + dev_compare, func_compare, pe_action); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PE, opal_pci_set_pe, 7); + +static int64_t opal_pci_set_peltv(uint64_t phb_id, uint32_t parent_pe, + uint32_t child_pe, uint8_t state) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_peltv) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_peltv(phb, parent_pe, child_pe, state); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PELTV, opal_pci_set_peltv, 4); + +static int64_t opal_pci_set_mve(uint64_t phb_id, uint32_t mve_number, + uint32_t pe_number) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_mve) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_mve(phb, mve_number, pe_number); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_MVE, opal_pci_set_mve, 3); + +static int64_t opal_pci_set_mve_enable(uint64_t phb_id, uint32_t mve_number, + uint32_t state) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_mve_enable) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_mve_enable(phb, mve_number, state); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_MVE_ENABLE, opal_pci_set_mve_enable, 3); + +static int64_t opal_pci_get_xive_reissue(uint64_t phb_id __unused, + uint32_t xive_number __unused, + uint8_t *p_bit __unused, + uint8_t *q_bit __unused) +{ + /* IODA2 (P8) stuff, TODO */ + return OPAL_UNSUPPORTED; +} +opal_call(OPAL_PCI_GET_XIVE_REISSUE, opal_pci_get_xive_reissue, 4); + +static int64_t opal_pci_set_xive_reissue(uint64_t phb_id __unused, + uint32_t xive_number __unused, + uint8_t p_bit __unused, + uint8_t q_bit __unused) +{ + /* IODA2 (P8) stuff, TODO */ + return OPAL_UNSUPPORTED; +} +opal_call(OPAL_PCI_SET_XIVE_REISSUE, opal_pci_set_xive_reissue, 4); + +static int64_t opal_pci_msi_eoi(uint64_t phb_id, + uint32_t hwirq) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->pci_msi_eoi) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->pci_msi_eoi(phb, hwirq); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_MSI_EOI, opal_pci_msi_eoi, 2); + +static int64_t opal_pci_set_xive_pe(uint64_t phb_id, uint32_t pe_number, + uint32_t xive_num) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_xive_pe) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_xive_pe(phb, pe_number, xive_num); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_XIVE_PE, opal_pci_set_xive_pe, 3); + +static int64_t opal_get_xive_source(uint64_t phb_id, uint32_t xive_num, + int32_t *interrupt_source_number) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_xive_source) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->get_xive_source(phb, xive_num, interrupt_source_number); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_GET_XIVE_SOURCE, opal_get_xive_source, 3); + +static int64_t opal_get_msi_32(uint64_t phb_id, uint32_t mve_number, + uint32_t xive_num, uint8_t msi_range, + uint32_t *msi_address, uint32_t *message_data) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_msi_32) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->get_msi_32(phb, mve_number, xive_num, msi_range, + msi_address, message_data); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_GET_MSI_32, opal_get_msi_32, 6); + +static int64_t opal_get_msi_64(uint64_t phb_id, uint32_t mve_number, + uint32_t xive_num, uint8_t msi_range, + uint64_t *msi_address, uint32_t *message_data) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_msi_64) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->get_msi_64(phb, mve_number, xive_num, msi_range, + msi_address, message_data); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_GET_MSI_64, opal_get_msi_64, 6); + +static int64_t opal_pci_map_pe_dma_window(uint64_t phb_id, uint16_t pe_number, + uint16_t window_id, + uint16_t tce_levels, + uint64_t tce_table_addr, + uint64_t tce_table_size, + uint64_t tce_page_size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->map_pe_dma_window) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->map_pe_dma_window(phb, pe_number, window_id, + tce_levels, tce_table_addr, + tce_table_size, tce_page_size); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW, opal_pci_map_pe_dma_window, 7); + +static int64_t opal_pci_map_pe_dma_window_real(uint64_t phb_id, + uint16_t pe_number, + uint16_t window_id, + uint64_t pci_start_addr, + uint64_t pci_mem_size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->map_pe_dma_window_real) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->map_pe_dma_window_real(phb, pe_number, window_id, + pci_start_addr, pci_mem_size); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, opal_pci_map_pe_dma_window_real, 5); + +static int64_t opal_pci_reset(uint64_t phb_id, uint8_t reset_scope, + uint8_t assert_state) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc = OPAL_SUCCESS; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops) + return OPAL_UNSUPPORTED; + if (assert_state != OPAL_ASSERT_RESET && + assert_state != OPAL_DEASSERT_RESET) + return OPAL_PARAMETER; + + phb->ops->lock(phb); + + switch(reset_scope) { + case OPAL_RESET_PHB_COMPLETE: + if (!phb->ops->complete_reset) { + rc = OPAL_UNSUPPORTED; + break; + } + + rc = phb->ops->complete_reset(phb, assert_state); + if (rc < 0) + prerror("PHB#%d: Failure on complete reset, rc=%lld\n", + phb->opal_id, rc); + break; + case OPAL_RESET_PCI_FUNDAMENTAL: + if (!phb->ops->fundamental_reset) { + rc = OPAL_UNSUPPORTED; + break; + } + + /* We need do nothing on deassert time */ + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = phb->ops->fundamental_reset(phb); + if (rc < 0) + prerror("PHB#%d: Failure on fundamental reset, rc=%lld\n", + phb->opal_id, rc); + break; + case OPAL_RESET_PCI_HOT: + if (!phb->ops->hot_reset) { + rc = OPAL_UNSUPPORTED; + break; + } + + /* We need do nothing on deassert time */ + if (assert_state != OPAL_ASSERT_RESET) + break; + + rc = phb->ops->hot_reset(phb); + if (rc < 0) + prerror("PHB#%d: Failure on hot reset, rc=%lld\n", + phb->opal_id, rc); + break; + case OPAL_RESET_PCI_IODA_TABLE: + if (assert_state != OPAL_ASSERT_RESET) + break; + if (phb->ops->ioda_reset) + phb->ops->ioda_reset(phb, true); + break; + default: + rc = OPAL_UNSUPPORTED; + } + phb->ops->unlock(phb); + pci_put_phb(phb); + + return (rc > 0) ? tb_to_msecs(rc) : rc; +} +opal_call(OPAL_PCI_RESET, opal_pci_reset, 3); + +static int64_t opal_pci_reinit(uint64_t phb_id, + uint64_t reinit_scope, + uint64_t data) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops || !phb->ops->pci_reinit) + return OPAL_UNSUPPORTED; + + phb->ops->lock(phb); + rc = phb->ops->pci_reinit(phb, reinit_scope, data); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_REINIT, opal_pci_reinit, 3); + +static int64_t opal_pci_poll(uint64_t phb_id) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops || !phb->ops->poll) + return OPAL_UNSUPPORTED; + + phb->ops->lock(phb); + rc = phb->ops->poll(phb); + phb->ops->unlock(phb); + pci_put_phb(phb); + + /* Return milliseconds for caller to sleep: round up */ + if (rc > 0) { + rc = tb_to_msecs(rc); + if (rc == 0) + rc = 1; + } + + return rc; +} +opal_call(OPAL_PCI_POLL, opal_pci_poll, 1); + +static int64_t opal_pci_set_phb_tce_memory(uint64_t phb_id, + uint64_t tce_mem_addr, + uint64_t tce_mem_size) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_phb_tce_memory) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->set_phb_tce_memory(phb, tce_mem_addr, tce_mem_size); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_SET_PHB_TCE_MEMORY, opal_pci_set_phb_tce_memory, 3); + +static int64_t opal_pci_get_phb_diag_data(uint64_t phb_id, + void *diag_buffer, + uint64_t diag_buffer_len) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_diag_data) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->get_diag_data(phb, diag_buffer, diag_buffer_len); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_GET_PHB_DIAG_DATA, opal_pci_get_phb_diag_data, 3); + +static int64_t opal_pci_get_phb_diag_data2(uint64_t phb_id, + void *diag_buffer, + uint64_t diag_buffer_len) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->get_diag_data2) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->get_diag_data2(phb, diag_buffer, diag_buffer_len); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_GET_PHB_DIAG_DATA2, opal_pci_get_phb_diag_data2, 3); + +static int64_t opal_pci_next_error(uint64_t phb_id, uint64_t *first_frozen_pe, + uint16_t *pci_error_type, uint16_t *severity) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->next_error) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + + /* Any call to this function clears the error event */ + opal_update_pending_evt(OPAL_EVENT_PCI_ERROR, 0); + rc = phb->ops->next_error(phb, first_frozen_pe, pci_error_type, + severity); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_NEXT_ERROR, opal_pci_next_error, 4); + +static int64_t opal_pci_eeh_freeze_status2(uint64_t phb_id, uint64_t pe_number, + uint8_t *freeze_state, + uint16_t *pci_error_type, + uint16_t *severity, + uint64_t *phb_status) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->eeh_freeze_status) + return OPAL_UNSUPPORTED; + phb->ops->lock(phb); + rc = phb->ops->eeh_freeze_status(phb, pe_number, freeze_state, + pci_error_type, severity, phb_status); + phb->ops->unlock(phb); + pci_put_phb(phb); + + return rc; +} +opal_call(OPAL_PCI_EEH_FREEZE_STATUS2, opal_pci_eeh_freeze_status2, 6); + +static int64_t opal_pci_set_phb_capi_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number) +{ + struct phb *phb = pci_get_phb(phb_id); + int64_t rc; + + if (!phb) + return OPAL_PARAMETER; + if (!phb->ops->set_capi_mode) + return OPAL_UNSUPPORTED; + if (mode == 1) { + phb->ops->lock(phb); + rc = phb->ops->set_capi_mode(phb, mode, pe_number); + phb->ops->unlock(phb); + return rc; + } + if (mode == 0) { + /* FIXME add support for PCI mode*/ + } + return OPAL_UNSUPPORTED; +} +opal_call(OPAL_PCI_SET_PHB_CAPI_MODE, opal_pci_set_phb_capi_mode, 3); diff --git a/core/pci.c b/core/pci.c new file mode 100644 index 0000000..f07908b --- /dev/null +++ b/core/pci.c @@ -0,0 +1,1388 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <pci.h> +#include <pci-cfg.h> +#include <timebase.h> +#include <lock.h> +#include <device.h> + +static struct lock pci_lock = LOCK_UNLOCKED; +#define PCI_MAX_PHBs 64 +static struct phb *phbs[PCI_MAX_PHBs]; + +#define DBG(fmt...) do { } while(0) + +/* + * Generic PCI utilities + */ + +/* pci_find_cap - Find a PCI capability in a device config space + * + * This will return a config space offset (positive) or a negative + * error (OPAL error codes). + * + * OPAL_UNSUPPORTED is returned if the capability doesn't exist + */ +int64_t pci_find_cap(struct phb *phb, uint16_t bdfn, uint8_t want) +{ + int64_t rc; + uint16_t stat, cap; + uint8_t pos, next; + + rc = pci_cfg_read16(phb, bdfn, PCI_CFG_STAT, &stat); + if (rc) + return rc; + if (!(stat & PCI_CFG_STAT_CAP)) + return OPAL_UNSUPPORTED; + rc = pci_cfg_read8(phb, bdfn, PCI_CFG_CAP, &pos); + if (rc) + return rc; + pos &= 0xfc; + while(pos) { + rc = pci_cfg_read16(phb, bdfn, pos, &cap); + if (rc) + return rc; + if ((cap & 0xff) == want) + return pos; + next = (cap >> 8) & 0xfc; + if (next == pos) { + prerror("PHB%d: dev %04x pci_find_cap hit a loop !\n", + phb->opal_id, bdfn); + break; + } + pos = next; + } + return OPAL_UNSUPPORTED; +} + +/* pci_find_ecap - Find a PCIe extended capability in a device + * config space + * + * This will return a config space offset (positive) or a negative + * error (OPAL error code). Additionally, if the "version" argument + * is non-NULL, the capability version will be returned there. + * + * OPAL_UNSUPPORTED is returned if the capability doesn't exist + */ +int64_t pci_find_ecap(struct phb *phb, uint16_t bdfn, uint16_t want, + uint8_t *version) +{ + int64_t rc; + uint32_t cap; + uint16_t off, prev = 0; + + for (off = 0x100; off && off < 0x1000; off = (cap >> 20) & 0xffc ) { + if (off == prev) { + prerror("PHB%d: dev %04x pci_find_ecap hit a loop !\n", + phb->opal_id, bdfn); + break; + } + prev = off; + rc = pci_cfg_read32(phb, bdfn, off, &cap); + if (rc) + return rc; + if ((cap & 0xffff) == want) { + if (version) + *version = (cap >> 16) & 0xf; + return off; + } + } + return OPAL_UNSUPPORTED; +} + +static struct pci_device *pci_scan_one(struct phb *phb, struct pci_device *parent, + uint16_t bdfn) +{ + struct pci_device *pd = NULL; + uint32_t retries, vdid, val; + int64_t rc, ecap; + uint8_t htype; + uint16_t capreg; + bool had_crs = false; + + for (retries = 40; retries; retries--) { + rc = pci_cfg_read32(phb, bdfn, 0, &vdid); + if (rc) + return NULL; + if (vdid == 0xffffffff || vdid == 0x00000000) + return NULL; + if (vdid != 0xffff0001) + break; + had_crs = true; + time_wait_ms(100); + } + if (vdid == 0xffff0001) { + prerror("PCI: Device %04x CRS timeout !\n", bdfn); + return NULL; + } + if (had_crs) + printf("PCI: Device %04x replied after CRS\n", bdfn); + pd = zalloc(sizeof(struct pci_device)); + if (!pd) { + prerror("PCI: Failed to allocate structure pci_device !\n"); + goto fail; + } + pd->bdfn = bdfn; + pd->parent = parent; + list_head_init(&pd->children); + rc = pci_cfg_read8(phb, bdfn, PCI_CFG_HDR_TYPE, &htype); + if (rc) { + prerror("PCI: Failed to read header type !\n"); + goto fail; + } + pd->is_multifunction = !!(htype & 0x80); + pd->is_bridge = (htype & 0x7f) != 0; + pd->scan_map = 0xffffffff; /* Default */ + + ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP); + if (ecap > 0) { + pci_set_cap(pd, PCI_CFG_CAP_ID_EXP, ecap, false); + pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_CAPABILITY_REG, + &capreg); + pd->dev_type = GETFIELD(PCICAP_EXP_CAP_TYPE, capreg); + + /* + * XXX We observe a problem on some PLX switches where one + * of the downstream ports appears as an upstream port, we + * fix that up here otherwise, other code will misbehave + */ + if (pd->parent && pd->dev_type == PCIE_TYPE_SWITCH_UPPORT && + pd->parent->dev_type == PCIE_TYPE_SWITCH_UPPORT && + vdid == 0x874810b5) { + prerror("PCI: Fixing up bad PLX downstream port !\n"); + pd->dev_type = PCIE_TYPE_SWITCH_DNPORT; + } + + /* XXX Handle ARI */ + if (pd->dev_type == PCIE_TYPE_SWITCH_DNPORT || + pd->dev_type == PCIE_TYPE_ROOT_PORT) + pd->scan_map = 0x1; + + /* Read MPS capability, whose maximal size is 4096 */ + pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_DEVCAP, &val); + pd->mps = (128 << GETFIELD(PCICAP_EXP_DEVCAP_MPSS, val)); + if (pd->mps > 4096) + pd->mps = 4096; + } else { + pd->dev_type = PCIE_TYPE_LEGACY; + } + + /* If it's a bridge, sanitize the bus numbers to avoid forwarding + * + * This will help when walking down those bridges later on + */ + if (pd->is_bridge) { + pci_cfg_write8(phb, bdfn, PCI_CFG_PRIMARY_BUS, bdfn >> 8); + pci_cfg_write8(phb, bdfn, PCI_CFG_SECONDARY_BUS, 0); + pci_cfg_write8(phb, bdfn, PCI_CFG_SUBORDINATE_BUS, 0); + } + + /* XXX Need to do some basic setups, such as MPSS, MRS, + * RCB, etc... + */ + + printf("PCI: Device %04x VID:%04x DEV:%04x TYP:%d MF%s BR%s EX%s\n", + bdfn, vdid & 0xffff, vdid >> 16, pd->dev_type, + pd->is_multifunction ? "+" : "-", + pd->is_bridge ? "+" : "-", + pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) ? "+" : "-"); + + /* + * Call PHB hook + */ + if (phb->ops->device_init) + phb->ops->device_init(phb, pd); + + return pd; + fail: + if (pd) + free(pd); + return NULL; +} + +/* pci_check_clear_freeze - Probing empty slot will result in an EEH + * freeze. Currently we have a single PE mapping + * everything (default state of our backend) so + * we just check and clear the state of PE#0 + * + * NOTE: We currently only handle simple PE freeze, not PHB fencing + * (or rather our backend does) + */ +static void pci_check_clear_freeze(struct phb *phb) +{ + int64_t rc; + uint8_t freeze_state; + uint16_t pci_error_type, sev; + + rc = phb->ops->eeh_freeze_status(phb, 0, &freeze_state, + &pci_error_type, &sev, NULL); + if (rc) + return; + if (freeze_state == OPAL_EEH_STOPPED_NOT_FROZEN) + return; + /* We can't handle anything worse than an ER here */ + if (sev > OPAL_EEH_SEV_NO_ERROR && + sev < OPAL_EEH_SEV_PE_ER) { + prerror("PCI: PHB%d fatal probe error !\n", phb->opal_id); + return; + } + phb->ops->eeh_freeze_clear(phb, 0, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); +} + +/* pci_enable_bridge - Called before scanning a bridge + * + * Ensures error flags are clean, disable master abort, and + * check if the subordinate bus isn't reset, the slot is enabled + * on PCIe, etc... + */ +static bool pci_enable_bridge(struct phb *phb, struct pci_device *pd) +{ + uint16_t bctl; + bool was_reset = false; + int64_t ecap = 0; + + /* Disable master aborts, clear errors */ + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_BRCTL, &bctl); + bctl &= ~PCI_CFG_BRCTL_MABORT_REPORT; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl); + + /* PCI-E bridge, check the slot state */ + if (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + uint16_t slctl, slcap, slsta, lctl; + + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + + /* Read the slot status & check for presence detect */ + pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTSTAT, &slsta); + DBG(" slstat=%04x\n", slsta); + if (!(slsta & PCICAP_EXP_SLOTSTAT_PDETECTST)) { + printf("PCI: No card in slot\n"); + return false; + } + + /* Read the slot capabilities */ + pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTCAP, &slcap); + DBG(" slcap=%04x\n", slcap); + if (!(slcap & PCICAP_EXP_SLOTCAP_PWCTRL)) + goto power_is_on; + + /* Read the slot control register, check if the slot is off */ + pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTCTL, &slctl); + DBG(" slctl=%04x\n", slctl); + if (!(slctl & PCICAP_EXP_SLOTCTL_PWRCTLR)) + goto power_is_on; + + /* Turn power on + * + * XXX This is a "command", we should wait for it to complete + * etc... but just waiting 2s will do for now + */ + DBG("PCI: Bridge power is off, turning on ...\n"); + slctl &= ~PCICAP_EXP_SLOTCTL_PWRCTLR; + slctl |= SETFIELD(PCICAP_EXP_SLOTCTL_PWRI, 0, PCIE_INDIC_ON); + pci_cfg_write16(phb, pd->bdfn, ecap+PCICAP_EXP_SLOTCTL, slctl); + + /* Wait a couple of seconds */ + time_wait_ms(2000); + + power_is_on: + /* Enable link */ + pci_cfg_read16(phb, pd->bdfn, ecap+PCICAP_EXP_LCTL, &lctl); + DBG(" lctl=%04x\n", lctl); + lctl &= ~PCICAP_EXP_LCTL_LINK_DIS; + pci_cfg_write16(phb, pd->bdfn, ecap+PCICAP_EXP_LCTL, lctl); + } + + /* Clear secondary reset */ + if (bctl & PCI_CFG_BRCTL_SECONDARY_RESET) { + printf("PCI: Bridge secondary reset is on, clearing it ...\n"); + bctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_BRCTL, bctl); + time_wait_ms(1000); + was_reset = true; + } + + /* PCI-E bridge, wait for link */ + if (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT) { + uint32_t lcap; + + /* Read link caps */ + pci_cfg_read32(phb, pd->bdfn, ecap+PCICAP_EXP_LCAP, &lcap); + + /* Did link capability say we got reporting ? + * + * If yes, wait up to 10s, if not, wait 1s if we didn't already + */ + if (lcap & PCICAP_EXP_LCAP_DL_ACT_REP) { + uint32_t retries = 100; + uint16_t lstat; + + printf("%016lx: waiting for link... \n", mftb()); + + while(retries--) { + pci_cfg_read16(phb, pd->bdfn, + ecap+PCICAP_EXP_LSTAT, &lstat); + if (lstat & PCICAP_EXP_LSTAT_DLLL_ACT) + break; + time_wait_ms(100); + } + printf("%016lx: end wait for link...\n", mftb()); + if (!(lstat & PCICAP_EXP_LSTAT_DLLL_ACT)) { + prerror("PCI: Bridge %04x, timeout waiting" + " for downstream link\n", pd->bdfn); + return false; + } + /* Need to wait another 100ms before touching + * the config space + */ + time_wait_ms(100); + } else if (!was_reset) + time_wait_ms(1000); + } + + /* Clear error status */ + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_STAT, 0xffff); + + return true; +} + +/* Clear up bridge resources */ +static void pci_cleanup_bridge(struct phb *phb, struct pci_device *pd) +{ + uint16_t cmd; + + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_BASE_U16, 0xffff); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_BASE, 0xf0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_IO_LIMIT_U16, 0); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_IO_LIMIT, 0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_BASE, 0xfff0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_MEM_LIMIT, 0); + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE_U32, 0xffffffff); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_BASE, 0xfff0); + pci_cfg_write32(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT_U32, 0); + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_PREF_MEM_LIMIT, 0); + + /* Note: This is a bit fishy but since we have closed all the + * bridge windows above, it shouldn't be a problem. Basically + * we enable Memory, IO and Bus Master on the bridge because + * some versions of Linux will fail to do it themselves. + */ + pci_cfg_read16(phb, pd->bdfn, PCI_CFG_CMD, &cmd); + cmd |= PCI_CFG_CMD_IO_EN | PCI_CFG_CMD_MEM_EN; + cmd |= PCI_CFG_CMD_BUS_MASTER_EN; + pci_cfg_write16(phb, pd->bdfn, PCI_CFG_CMD, cmd); +} + + +/* pci_scan - Perform a recursive scan of the bus at bus_number + * populating the list passed as an argument. This also + * performs the bus numbering, so it returns the largest + * bus number that was assigned. + * + * Note: Eventually this might want to access some VPD information + * in order to know what slots to scan and what not etc.. + * + * XXX NOTE: We might want to enable ARI along the way... + * + * XXX NOTE: We might also want to setup the PCIe MPS/MRSS properly + * here as Linux may or may not do it + */ +static uint8_t pci_scan(struct phb *phb, uint8_t bus, uint8_t max_bus, + struct list_head *list, struct pci_device *parent, + bool scan_downstream) +{ + struct pci_device *pd = NULL; + uint8_t dev, fn, next_bus, max_sub, save_max; + uint32_t scan_map; + + /* Decide what to scan */ + scan_map = parent ? parent->scan_map : phb->scan_map; + + /* Do scan */ + for (dev = 0; dev < 32; dev++) { + if (!(scan_map & (1ul << dev))) + continue; + + /* Scan the device */ + pd = pci_scan_one(phb, parent, (bus << 8) | (dev << 3)); + pci_check_clear_freeze(phb); + if (!pd) + continue; + + /* Get slot info if any */ + if (platform.pci_get_slot_info) + platform.pci_get_slot_info(phb, pd); + + /* Link it up */ + list_add_tail(list, &pd->link); + + /* XXX Handle ARI */ + if (!pd->is_multifunction) + continue; + for (fn = 1; fn < 8; fn++) { + pd = pci_scan_one(phb, parent, + ((uint16_t)bus << 8) | (dev << 3) | fn); + pci_check_clear_freeze(phb); + if (pd) { + if (platform.pci_get_slot_info) + platform.pci_get_slot_info(phb, pd); + list_add_tail(list, &pd->link); + } + } + } + + /* + * We only scan downstream if instructed to do so by the + * caller. Typically we avoid the scan when we know the + * link is down already, which happens for the top level + * root complex, and avoids a long secondary timeout + */ + if (!scan_downstream) + return bus; + + next_bus = bus + 1; + max_sub = bus; + save_max = max_bus; + + /* Scan down bridges */ + list_for_each(list, pd, link) { + bool use_max, do_scan; + + if (!pd->is_bridge) + continue; + + /* We need to figure out a new bus number to start from. + * + * This can be tricky due to our HW constraints which differ + * from bridge to bridge so we are going to let the phb + * driver decide what to do. This can return us a maximum + * bus number to assign as well + * + * This function will: + * + * - Return the bus number to use as secondary for the + * bridge or 0 for a failure + * + * - "max_bus" will be adjusted to represent the max + * subordinate that can be associated with the downstream + * device + * + * - "use_max" will be set to true if the returned max_bus + * *must* be used as the subordinate bus number of that + * bridge (when we need to give aligned powers of two's + * on P7IOC). If is is set to false, we just adjust the + * subordinate bus number based on what we probed. + * + */ + max_bus = save_max; + next_bus = phb->ops->choose_bus(phb, pd, next_bus, + &max_bus, &use_max); + + /* Configure the bridge with the returned values */ + if (next_bus <= bus) { + printf("PCI: Bridge %04x, out of bus numbers !\n", + pd->bdfn); + max_bus = next_bus = 0; /* Failure case */ + } + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, next_bus); + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_bus); + if (!next_bus) + break; + + printf("PCI: Bridge %04x, bus: %02x..%02x %s scanning...\n", + pd->bdfn, next_bus, max_bus, use_max ? "[use max]" : ""); + + /* Clear up bridge resources */ + pci_cleanup_bridge(phb, pd); + + /* Configure the bridge. This will enable power to the slot + * if it's currently disabled, lift reset, etc... + * + * Return false if we know there's nothing behind the bridge + */ + do_scan = pci_enable_bridge(phb, pd); + + /* Perform recursive scan */ + if (do_scan) { + max_sub = pci_scan(phb, next_bus, max_bus, + &pd->children, pd, true); + } else if (!use_max) { + /* XXX Empty bridge... we leave room for hotplug + * slots etc.. but we should be smarter at figuring + * out if this is actually a hotpluggable one + */ + max_sub = next_bus + 4; + if (max_sub > max_bus) + max_sub = max_bus; + } + + /* Update the max subordinate as described previously */ + if (use_max) + max_sub = max_bus; + pci_cfg_write8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, max_sub); + next_bus = max_sub + 1; + } + + return max_sub; +} + +static int pci_get_mps(struct phb *phb, + struct pci_device *pd, void *userdata) +{ + uint32_t *mps = (uint32_t *)userdata; + + /* Only check PCI device that had MPS capacity */ + if (phb && pd && pd->mps && *mps > pd->mps) + *mps = pd->mps; + + return 0; +} + +static int __pci_configure_mps(struct phb *phb, + struct pci_device *pd, + void *userdata __unused) +{ + uint32_t ecap, mps = phb->mps; + uint16_t val; + + /* If the MPS isn't acceptable one, bail immediately */ + if (mps < 128 || mps > 4096) + return 1; + + if (!phb || !pd) + return 0; + + /* PCIe deivce always has MPS capacity */ + if (pd->mps) { + ecap = pci_cap(pd, PCI_CFG_CAP_ID_EXP, false); + mps = ilog2(mps) - 7; + + pci_cfg_read16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, &val); + val = SETFIELD(PCICAP_EXP_DEVCTL_MPS, val, mps); + pci_cfg_write16(phb, pd->bdfn, ecap + PCICAP_EXP_DEVCTL, val); + } + + return 0; +} + +int32_t pci_configure_mps(struct phb *phb, struct pci_device *pd) +{ + return __pci_configure_mps(phb, pd, NULL); +} + +/* + * The power state would be checked. If the power has + * been on, we will issue fundamental reset. Otherwise, + * we will power it on before issuing fundamental reset. + */ +static int64_t pci_reset_phb(struct phb *phb) +{ + const char *desc; + int64_t rc; + + rc = phb->ops->power_state(phb); + if (rc < 0) { + printf("PHB%d: Failed to get power state, rc=%lld\n", + phb->opal_id, rc); + return rc; + } + + if (rc == OPAL_SHPC_POWER_ON) { + desc = "fundamental reset"; + rc = phb->ops->fundamental_reset(phb); + } else { + desc = "power on"; + rc = phb->ops->slot_power_on(phb); + } + + if (rc < 0) { + /* Don't warn if it's just an empty slot */ + if (rc != OPAL_CLOSED) + printf("PHB%d: Failed to %s, rc=%lld\n", + phb->opal_id, desc, rc); + return rc; + } + + /* Wait the internal state machine */ + while (rc > 0) { + time_wait(rc); + rc = phb->ops->poll(phb); + } + if (rc < 0) + printf("PHB%d: Failed to %s, rc=%lld\n", + phb->opal_id, desc, rc); + + return rc; +} + +static void pci_init_slot(struct phb *phb) +{ + uint32_t mps = 0xffffffff; + int64_t rc; + bool has_link; + + printf("PHB%d: Init slot\n", phb->opal_id); + + /* + * For PCI/PCI-X, we get the slot info and we also + * check if the PHB has anything connected to it + */ + if (phb->phb_type < phb_type_pcie_v1) { + if (platform.pci_get_slot_info) + platform.pci_get_slot_info(phb, NULL); + rc = phb->ops->presence_detect(phb); + if (rc != OPAL_SHPC_DEV_PRESENT) { + printf("PHB%d: Slot empty\n", phb->opal_id); + return; + } + } + + /* + * Power on the PHB, the PHB should be reset in + * fundamental way while powering on. The reset + * state machine is going to wait for the link + */ + rc = pci_reset_phb(phb); + if (rc && rc != OPAL_CLOSED) + return; + + /* It's up, print some things */ + rc = phb->ops->link_state(phb); + if (rc < 0) { + printf("PHB%d: Failed to query link state, rc=%lld\n", + phb->opal_id, rc); + return; + } + has_link = rc != OPAL_SHPC_LINK_DOWN; + + if(!has_link) + printf("PHB%d: Link down\n", phb->opal_id); + else if (phb->phb_type >= phb_type_pcie_v1) + printf("PHB%d: Link up at x%lld width\n", phb->opal_id, rc); + + printf("PHB%d: Scanning (upstream%s)...\n", phb->opal_id, + has_link ? "+downsteam" : " only"); + pci_scan(phb, 0, 0xff, &phb->devices, NULL, has_link); + + /* Configre MPS (Max Payload Size) for PCIe domain */ + pci_walk_dev(phb, pci_get_mps, &mps); + phb->mps = mps; + pci_walk_dev(phb, __pci_configure_mps, NULL); +} + +int64_t pci_register_phb(struct phb *phb) +{ + int64_t rc = OPAL_SUCCESS; + unsigned int i; + + lock(&pci_lock); + for (i = 0; i < PCI_MAX_PHBs; i++) + if (!phbs[i]) + break; + if (i >= PCI_MAX_PHBs) { + prerror("PHB: Failed to find a free ID slot\n"); + rc = OPAL_RESOURCE; + } else { + phbs[i] = phb; + phb->opal_id = i; + dt_add_property_cells(phb->dt_node, "ibm,opal-phbid", + 0, phb->opal_id); + printf("PCI: Registered PHB ID %d\n", i); + } + list_head_init(&phb->devices); + unlock(&pci_lock); + + return rc; +} + +int64_t pci_unregister_phb(struct phb *phb) +{ + /* XXX We want some kind of RCU or RWlock to make things + * like that happen while no OPAL callback is in progress, + * that way we avoid taking a lock in each of them. + * + * Right now we don't unregister so we are fine + */ + lock(&pci_lock); + phbs[phb->opal_id] = phb; + unlock(&pci_lock); + + return OPAL_SUCCESS; +} + +struct phb *pci_get_phb(uint64_t phb_id) +{ + if (phb_id >= PCI_MAX_PHBs) + return NULL; + + /* XXX See comment in pci_unregister_phb() about locking etc... */ + return phbs[phb_id]; +} + +static const char *pci_class_name(uint32_t class_code) +{ + uint8_t class = class_code >> 16; + uint8_t sub = (class_code >> 8) & 0xff; + uint8_t pif = class_code & 0xff; + + switch(class) { + case 0x00: + switch(sub) { + case 0x00: return "device"; + case 0x01: return "vga"; + } + break; + case 0x01: + switch(sub) { + case 0x00: return "scsi"; + case 0x01: return "ide"; + case 0x02: return "fdc"; + case 0x03: return "ipi"; + case 0x04: return "raid"; + case 0x05: return "ata"; + case 0x06: return "sata"; + case 0x07: return "sas"; + default: return "mass-storage"; + } + case 0x02: + switch(sub) { + case 0x00: return "ethernet"; + case 0x01: return "token-ring"; + case 0x02: return "fddi"; + case 0x03: return "atm"; + case 0x04: return "isdn"; + case 0x05: return "worldfip"; + case 0x06: return "picmg"; + default: return "network"; + } + case 0x03: + switch(sub) { + case 0x00: return "vga"; + case 0x01: return "xga"; + case 0x02: return "3d-controller"; + default: return "display"; + } + case 0x04: + switch(sub) { + case 0x00: return "video"; + case 0x01: return "sound"; + case 0x02: return "telephony"; + default: return "multimedia-device"; + } + case 0x05: + switch(sub) { + case 0x00: return "memory"; + case 0x01: return "flash"; + default: return "memory-controller"; + } + case 0x06: + switch(sub) { + case 0x00: return "host"; + case 0x01: return "isa"; + case 0x02: return "eisa"; + case 0x03: return "mca"; + case 0x04: return "pci"; + case 0x05: return "pcmcia"; + case 0x06: return "nubus"; + case 0x07: return "cardbus"; + case 0x08: return "raceway"; + case 0x09: return "semi-transparent-pci"; + case 0x0a: return "infiniband"; + default: return "unknown-bridge"; + } + case 0x07: + switch(sub) { + case 0x00: + switch(pif) { + case 0x01: return "16450-serial"; + case 0x02: return "16550-serial"; + case 0x03: return "16650-serial"; + case 0x04: return "16750-serial"; + case 0x05: return "16850-serial"; + case 0x06: return "16950-serial"; + default: return "serial"; + } + case 0x01: + switch(pif) { + case 0x01: return "bi-directional-parallel"; + case 0x02: return "ecp-1.x-parallel"; + case 0x03: return "ieee1284-controller"; + case 0xfe: return "ieee1284-device"; + default: return "parallel"; + } + case 0x02: return "multiport-serial"; + case 0x03: + switch(pif) { + case 0x01: return "16450-modem"; + case 0x02: return "16550-modem"; + case 0x03: return "16650-modem"; + case 0x04: return "16750-modem"; + default: return "modem"; + } + case 0x04: return "gpib"; + case 0x05: return "smart-card"; + default: return "communication-controller"; + } + case 0x08: + switch(sub) { + case 0x00: + switch(pif) { + case 0x01: return "isa-pic"; + case 0x02: return "eisa-pic"; + case 0x10: return "io-apic"; + case 0x20: return "iox-apic"; + default: return "interrupt-controller"; + } + case 0x01: + switch(pif) { + case 0x01: return "isa-dma"; + case 0x02: return "eisa-dma"; + default: return "dma-controller"; + } + case 0x02: + switch(pif) { + case 0x01: return "isa-system-timer"; + case 0x02: return "eisa-system-timer"; + default: return "timer"; + } + case 0x03: + switch(pif) { + case 0x01: return "isa-rtc"; + default: return "rtc"; + } + case 0x04: return "hotplug-controller"; + case 0x05: return "sd-host-controller"; + default: return "system-peripheral"; + } + case 0x09: + switch(sub) { + case 0x00: return "keyboard"; + case 0x01: return "pen"; + case 0x02: return "mouse"; + case 0x03: return "scanner"; + case 0x04: return "gameport"; + default: return "input-controller"; + } + case 0x0a: + switch(sub) { + case 0x00: return "clock"; + default: return "docking-station"; + } + case 0x0b: + switch(sub) { + case 0x00: return "386"; + case 0x01: return "486"; + case 0x02: return "pentium"; + case 0x10: return "alpha"; + case 0x20: return "powerpc"; + case 0x30: return "mips"; + case 0x40: return "co-processor"; + default: return "cpu"; + } + case 0x0c: + switch(sub) { + case 0x00: return "firewire"; + case 0x01: return "access-bus"; + case 0x02: return "ssa"; + case 0x03: + switch(pif) { + case 0x00: return "usb-uhci"; + case 0x10: return "usb-ohci"; + case 0x20: return "usb-ehci"; + case 0x30: return "usb-xhci"; + case 0xfe: return "usb-device"; + default: return "usb"; + } + case 0x04: return "fibre-channel"; + case 0x05: return "smb"; + case 0x06: return "infiniband"; + case 0x07: + switch(pif) { + case 0x00: return "impi-smic"; + case 0x01: return "impi-kbrd"; + case 0x02: return "impi-bltr"; + default: return "impi"; + } + case 0x08: return "secos"; + case 0x09: return "canbus"; + default: return "serial-bus"; + } + case 0x0d: + switch(sub) { + case 0x00: return "irda"; + case 0x01: return "consumer-ir"; + case 0x10: return "rf-controller"; + case 0x11: return "bluetooth"; + case 0x12: return "broadband"; + case 0x20: return "enet-802.11a"; + case 0x21: return "enet-802.11b"; + default: return "wireless-controller"; + } + case 0x0e: return "intelligent-controller"; + case 0x0f: + switch(sub) { + case 0x01: return "satellite-tv"; + case 0x02: return "satellite-audio"; + case 0x03: return "satellite-voice"; + case 0x04: return "satellite-data"; + default: return "satellite-device"; + } + case 0x10: + switch(sub) { + case 0x00: return "network-encryption"; + case 0x01: return "entertainment-encryption"; + default: return "encryption"; + } + case 0x011: + switch(sub) { + case 0x00: return "dpio"; + case 0x01: return "counter"; + case 0x10: return "measurement"; + case 0x20: return "management-card"; + default: return "data-processing"; + } + } + return "device"; +} + +void pci_std_swizzle_irq_map(struct dt_node *np, + struct pci_device *pd, + struct pci_lsi_state *lstate, + uint8_t swizzle) +{ + uint32_t *map, *p; + int dev, irq; + size_t map_size; + + /* Size in bytes of a target interrupt */ + size_t isize = lstate->int_size * sizeof(uint32_t); + + /* Calculate the size of a map entry: + * + * 3 cells : PCI Address + * 1 cell : PCI IRQ + * 1 cell : PIC phandle + * n cells : PIC irq (n = lstate->int_size) + * + * Assumption: PIC address is 0-size + */ + int esize = 3 + 1 + 1 + lstate->int_size; + + /* Number of map "device" entries + * + * A PCI Express root or downstream port needs only one + * entry for device 0. Anything else will get a full map + * for all possible 32 child device numbers + * + * If we have been passed a host bridge (pd == NULL) we also + * do a simple per-pin map + */ + int edevcount; + + if (!pd || (pd->dev_type == PCIE_TYPE_ROOT_PORT || + pd->dev_type == PCIE_TYPE_SWITCH_DNPORT)) { + edevcount = 1; + dt_add_property_cells(np, "interrupt-map-mask", 0, 0, 0, 7); + } else { + edevcount = 32; + dt_add_property_cells(np, "interrupt-map-mask", + 0xf800, 0, 0, 7); + } + map_size = esize * edevcount * 4 * sizeof(uint32_t); + map = p = zalloc(map_size); + + for (dev = 0; dev < edevcount; dev++) { + for (irq = 0; irq < 4; irq++) { + /* Calculate pin */ + uint32_t new_irq = (irq + dev + swizzle) % 4; + + /* PCI address portion */ + *(p++) = dev << (8 + 3); + *(p++) = 0; + *(p++) = 0; + + /* PCI interrupt portion */ + *(p++) = irq + 1; + + /* Parent phandle */ + *(p++) = lstate->int_parent[new_irq]; + + /* Parent desc */ + memcpy(p, lstate->int_val[new_irq], isize); + p += lstate->int_size; + } + } + + dt_add_property(np, "interrupt-map", map, map_size); + free(map); +} + +static void pci_add_slot_properties(struct phb *phb, struct pci_slot_info *info, + struct dt_node *np) +{ + char loc_code[LOC_CODE_SIZE]; + size_t base_loc_code_len, slot_label_len; + + if (phb->base_loc_code) { + base_loc_code_len = strlen(phb->base_loc_code); + slot_label_len = strlen(info->label); + if ((base_loc_code_len + slot_label_len +1) < LOC_CODE_SIZE) { + strcpy(loc_code, phb->base_loc_code); + strcat(loc_code, "-"); + strcat(loc_code, info->label); + dt_add_property(np, "ibm,slot-location-code", + loc_code, strlen(loc_code) + 1); + } else + prerror("PCI: Loc Code too long - %zu + %zu + 1\n", + base_loc_code_len, slot_label_len); + } else + DBG("PCI: Base Loc code not found...\n"); + + /* Add other slot information */ + dt_add_property_cells(np, "ibm,slot-pluggable", info->pluggable); + dt_add_property_cells(np, "ibm,slot-power-ctl", info->power_ctl); + dt_add_property_cells(np, "ibm,slot-wired-lanes", info->wired_lanes); + /*dt_add_property(np, "ibm,slot-bus-clock", &pd->slot_info->bus_clock, sizeof(uint8_t));*/ + dt_add_property_cells(np, "ibm,slot-connector-type", info->connector_type); + dt_add_property_cells(np, "ibm,slot-card-desc", info->card_desc); + dt_add_property_cells(np, "ibm,slot-card-mech", info->card_mech); + dt_add_property_cells(np, "ibm,slot-pwr-led-ctl", info->pwr_led_ctl); + dt_add_property_cells(np, "ibm,slot-attn-led-ctl", info->attn_led_ctl); + dt_add_property_string(np, "ibm,slot-label", info->label); +} + +static void pci_add_loc_code(struct dt_node *np) +{ + struct dt_node *p = np->parent; + const char *blcode = NULL; + + /* Look for a parent with a slot-location-code */ + while (p && !blcode) { + blcode = dt_prop_get_def(p, "ibm,slot-location-code", NULL); + p = p->parent; + } + if (!blcode) + return; + dt_add_property_string(np, "ibm,loc-code", blcode); +} + +static void pci_print_summary_line(struct phb *phb, struct pci_device *pd, + struct dt_node *np, u32 rev_class, + const char *cname) +{ + const char *label, *dtype, *s; + u32 vdid; +#define MAX_SLOTSTR 32 + char slotstr[MAX_SLOTSTR + 1] = { 0, }; + + pci_cfg_read32(phb, pd->bdfn, 0, &vdid); + + /* If it's a slot, it has a slot-label */ + label = dt_prop_get_def(np, "ibm,slot-label", NULL); + if (label) { + u32 lanes = dt_prop_get_u32_def(np, "ibm,slot-wired-lanes", 0); + static const char *lanestrs[] = { + "", " x1", " x2", " x4", " x8", "x16", "x32", "32b", "64b" + }; + const char *lstr = lanes > PCI_SLOT_WIRED_LANES_PCIX_64 ? "" : lanestrs[lanes]; + snprintf(slotstr, MAX_SLOTSTR, "SLOT=%3s %s", label, lstr); + /* XXX Add more slot info */ + } else { + /* + * No label, ignore downstream switch legs and root complex, + * Those would essentially be non-populated + */ + if (pd->dev_type != PCIE_TYPE_ROOT_PORT && + pd->dev_type != PCIE_TYPE_SWITCH_DNPORT) { + /* It's a mere device, get loc code */ + s = dt_prop_get_def(np, "ibm,loc-code", NULL); + if (s) + snprintf(slotstr, MAX_SLOTSTR, "LOC_CODE=%s", s); + } + } + + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + static const char *pcie_types[] = { + "EP ", "LGCY", "????", "????", "ROOT", "SWUP", "SWDN", + "ETOX", "XTOE", "RINT", "EVTC" }; + if (pd->dev_type >= ARRAY_SIZE(pcie_types)) + dtype = "????"; + else + dtype = pcie_types[pd->dev_type]; + } else + dtype = pd->is_bridge ? "PCIB" : "PCID"; + + if (pd->is_bridge) { + uint8_t sec_bus, sub_bus; + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_SECONDARY_BUS, &sec_bus); + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_SUBORDINATE_BUS, &sub_bus); + printf(" %04x:%02x:%02x.%x [%s] %04x %04x R:%02x C:%06x B:%02x..%02x %s\n", + phb->opal_id, pd->bdfn >> 8, (pd->bdfn >> 3) & 0x1f, + pd->bdfn & 0x7, dtype, vdid & 0xffff, vdid >> 16, + rev_class & 0xff, rev_class >> 8, sec_bus, sub_bus, slotstr); + } else + printf(" %04x:%02x:%02x.%x [%s] %04x %04x R:%02x C:%06x (%14s) %s\n", + phb->opal_id, pd->bdfn >> 8, (pd->bdfn >> 3) & 0x1f, + pd->bdfn & 0x7, dtype, vdid & 0xffff, vdid >> 16, + rev_class & 0xff, rev_class >> 8, cname, slotstr); +} + + +static void pci_add_one_node(struct phb *phb, struct pci_device *pd, + struct dt_node *parent_node, + struct pci_lsi_state *lstate, uint8_t swizzle) +{ + struct pci_device *child; + struct dt_node *np; + const char *cname; +#define MAX_NAME 256 + char name[MAX_NAME]; + char compat[MAX_NAME]; + uint32_t rev_class, vdid; + uint32_t reg[5]; + uint8_t intpin; + + pci_cfg_read32(phb, pd->bdfn, 0, &vdid); + pci_cfg_read32(phb, pd->bdfn, PCI_CFG_REV_ID, &rev_class); + pci_cfg_read8(phb, pd->bdfn, PCI_CFG_INT_PIN, &intpin); + + /* + * Quirk for IBM bridge bogus class on PCIe root complex. + * Without it, the PCI DN won't be created for its downstream + * devices in Linux. + */ + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false) && + parent_node == phb->dt_node) + rev_class = (rev_class & 0xff) | 0x6040000; + cname = pci_class_name(rev_class >> 8); + + if (pd->bdfn & 0x7) + snprintf(name, MAX_NAME - 1, "%s@%x,%x", + cname, (pd->bdfn >> 3) & 0x1f, pd->bdfn & 0x7); + else + snprintf(name, MAX_NAME - 1, "%s@%x", + cname, (pd->bdfn >> 3) & 0x1f); + np = dt_new(parent_node, name); + + /* XXX FIXME: make proper "compatible" properties */ + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) { + snprintf(compat, MAX_NAME, "pciex%x,%x", + vdid & 0xffff, vdid >> 16); + dt_add_property_cells(np, "ibm,pci-config-space-type", 1); + } else { + snprintf(compat, MAX_NAME, "pci%x,%x", + vdid & 0xffff, vdid >> 16); + dt_add_property_cells(np, "ibm,pci-config-space-type", 0); + } + dt_add_property_cells(np, "class-code", rev_class >> 8); + dt_add_property_cells(np, "revision-id", rev_class & 0xff); + dt_add_property_cells(np, "vendor-id", vdid & 0xffff); + dt_add_property_cells(np, "device-id", vdid >> 16); + if (intpin) + dt_add_property_cells(np, "interrupts", intpin); + + /* XXX FIXME: Add a few missing ones such as + * + * - devsel-speed (!express) + * - max-latency + * - min-grant + * - subsystem-id + * - subsystem-vendor-id + * - ... + */ + + /* Add slot properties if needed */ + if (pd->slot_info) + pci_add_slot_properties(phb, pd->slot_info, np); + + /* Make up location code */ + pci_add_loc_code(np); + + /* XXX FIXME: We don't look for BARs, we only put the config space + * entry in the "reg" property. That's enough for Linux and we might + * even want to make this legit in future ePAPR + */ + reg[0] = pd->bdfn << 8; + reg[1] = reg[2] = reg[3] = reg[4] = 0; + dt_add_property(np, "reg", reg, sizeof(reg)); + + /* Print summary info about the device */ + pci_print_summary_line(phb, pd, np, rev_class, cname); + + if (!pd->is_bridge) + return; + + dt_add_property_cells(np, "#address-cells", 3); + dt_add_property_cells(np, "#size-cells", 2); + dt_add_property_cells(np, "#interrupt-cells", 1); + + /* We want "device_type" for bridges */ + if (pci_has_cap(pd, PCI_CFG_CAP_ID_EXP, false)) + dt_add_property_string(np, "device_type", "pciex"); + else + dt_add_property_string(np, "device_type", "pci"); + + /* Update the current interrupt swizzling level based on our own + * device number + */ + swizzle = (swizzle + ((pd->bdfn >> 3) & 0x1f)) & 3; + + /* We generate a standard-swizzling interrupt map. This is pretty + * big, we *could* try to be smarter for things that aren't hotplug + * slots at least and only populate those entries for which there's + * an actual children (especially on PCI Express), but for now that + * will do + */ + pci_std_swizzle_irq_map(np, pd, lstate, swizzle); + + /* We do an empty ranges property for now, we haven't setup any + * bridge windows, the kernel will deal with that + * + * XXX The kernel should probably fix that up + */ + dt_add_property(np, "ranges", NULL, 0); + + list_for_each(&pd->children, child, link) + pci_add_one_node(phb, child, np, lstate, swizzle); +} + +static void pci_add_nodes(struct phb *phb) +{ + struct pci_lsi_state *lstate = &phb->lstate; + struct pci_device *pd; + + /* If the PHB has its own slot info, add them */ + if (phb->slot_info) + pci_add_slot_properties(phb, phb->slot_info, NULL); + + /* Add all child devices */ + list_for_each(&phb->devices, pd, link) + pci_add_one_node(phb, pd, phb->dt_node, lstate, 0); +} + +static void __pci_reset(struct list_head *list) +{ + struct pci_device *pd; + + while ((pd = list_pop(list, struct pci_device, link)) != NULL) { + __pci_reset(&pd->children); + free(pd); + } +} + +void pci_reset(void) +{ + unsigned int i; + + printf("PCI: Clearing all devices...\n"); + + lock(&pci_lock); + + /* XXX Do those in parallel (at least the power up + * state machine could be done in parallel) + */ + for (i = 0; i < PCI_MAX_PHBs; i++) { + if (!phbs[i]) + continue; + __pci_reset(&phbs[i]->devices); + } + unlock(&pci_lock); +} + +void pci_init_slots(void) +{ + unsigned int i; + + printf("PCI: Probing PHB slots...\n"); + + lock(&pci_lock); + + /* XXX Do those in parallel (at least the power up + * state machine could be done in parallel) + */ + for (i = 0; i < PCI_MAX_PHBs; i++) { + if (!phbs[i]) + continue; + pci_init_slot(phbs[i]); + } + + if (platform.pci_probe_complete) + platform.pci_probe_complete(); + + printf("PCI: Summary\n"); + for (i = 0; i < PCI_MAX_PHBs; i++) { + if (!phbs[i]) + continue; + pci_add_nodes(phbs[i]); + } + unlock(&pci_lock); +} + +static struct pci_device *__pci_walk_dev(struct phb *phb, + struct list_head *l, + int (*cb)(struct phb *, + struct pci_device *, + void *), + void *userdata) +{ + struct pci_device *pd, *child; + + if (list_empty(l)) + return NULL; + + list_for_each(l, pd, link) { + if (cb && cb(phb, pd, userdata)) + return pd; + + child = __pci_walk_dev(phb, &pd->children, cb, userdata); + if (child) + return child; + } + + return NULL; +} + +struct pci_device *pci_walk_dev(struct phb *phb, + int (*cb)(struct phb *, + struct pci_device *, + void *), + void *userdata) +{ + return __pci_walk_dev(phb, &phb->devices, cb, userdata); +} + +static int __pci_find_dev(struct phb *phb, + struct pci_device *pd, void *userdata) +{ + uint16_t bdfn = *((uint16_t *)userdata); + + if (!phb || !pd) + return 0; + + if (pd->bdfn == bdfn) + return 1; + + return 0; +} + +struct pci_device *pci_find_dev(struct phb *phb, uint16_t bdfn) +{ + return pci_walk_dev(phb, __pci_find_dev, &bdfn); +} diff --git a/core/platform.c b/core/platform.c new file mode 100644 index 0000000..e54b334 --- /dev/null +++ b/core/platform.c @@ -0,0 +1,78 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include <skiboot.h> +#include <opal.h> +#include <console.h> + +/* + * Various wrappers for platform functions + */ +static int64_t opal_cec_power_down(uint64_t request) +{ + printf("OPAL: Shutdown request type 0x%llx...\n", request); + + if (platform.cec_power_down) + return platform.cec_power_down(request); + + return OPAL_SUCCESS; +} +opal_call(OPAL_CEC_POWER_DOWN, opal_cec_power_down, 1); + +static int64_t opal_cec_reboot(void) +{ + printf("OPAL: Reboot request...\n"); + +#ifdef ENABLE_FAST_RESET + /* Try a fast reset first */ + fast_reset(); +#endif + if (platform.cec_reboot) + return platform.cec_reboot(); + + return OPAL_SUCCESS; +} +opal_call(OPAL_CEC_REBOOT, opal_cec_reboot, 0); + +static void generic_platform_init(void) +{ + /* Do we want to unconditionally enable it ? */ + if (dummy_console_enabled()) + dummy_console_add_nodes(); +} + +static struct platform generic_platform = { + .name = "generic", + .init = generic_platform_init, +}; + +void probe_platform(void) +{ + struct platform *platforms = &__platforms_start; + unsigned int i; + + platform = generic_platform; + + for (i = 0; &platforms[i] < &__platforms_end; i++) { + if (platforms[i].probe && platforms[i].probe()) { + platform = platforms[i]; + break; + } + } + + printf("PLAT: Detected %s platform\n", platform.name); +} diff --git a/core/relocate.c b/core/relocate.c new file mode 100644 index 0000000..f6bda37 --- /dev/null +++ b/core/relocate.c @@ -0,0 +1,65 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <stdbool.h> +#include <elf.h> + +/* WARNING: This code is used to self-relocate, it cannot have any + * global reference nor TOC reference. It's also called before BSS + * is cleared. + */ + +/* Called from head.S, thus no header. */ +int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela); + +/* Note: This code is simplified according to the assumptions + * that our link address is 0 and we are running at the + * target address already. + */ +int relocate(uint64_t offset, struct elf64_dyn *dyn, struct elf64_rela *rela) +{ + uint64_t dt_rela = 0; + uint64_t dt_relacount = 0; + unsigned int i; + + /* Look for relocation table */ + for (; dyn->d_tag != DT_NULL; dyn++) { + if (dyn->d_tag == DT_RELA) + dt_rela = dyn->d_val; + else if (dyn->d_tag == DT_RELACOUNT) + dt_relacount = dyn->d_val; + } + + /* If we miss either rela or relacount, bail */ + if (!dt_rela || !dt_relacount) + return false; + + /* Check if the offset is consistent */ + if ((offset + dt_rela) != (uint64_t)rela) + return false; + + /* Perform relocations */ + for (i = 0; i < dt_relacount; i++, rela++) { + uint64_t *t; + + if (ELF64_R_TYPE(rela->r_info) != R_PPC64_RELATIVE) + return false; + t = (uint64_t *)(rela->r_offset + offset); + *t = rela->r_addend + offset; + } + + return true; +} diff --git a/core/test/Makefile.check b/core/test/Makefile.check new file mode 100644 index 0000000..37dac46 --- /dev/null +++ b/core/test/Makefile.check @@ -0,0 +1,29 @@ +# -*-Makefile-*- +CORE_TEST := core/test/run-device core/test/run-mem_region core/test/run-malloc core/test/run-malloc-speed core/test/run-mem_region_init core/test/run-mem_region_release_unused core/test/run-mem_region_release_unused_noalloc core/test/run-trace core/test/run-msg + +check: $(CORE_TEST:%=%-check) + +$(CORE_TEST:%=%-check) : %-check: % + $(VALGRIND) $< + +core/test/stubs.o: core/test/stubs.c + $(HOSTCC) $(HOSTCFLAGS) -g -c -o $@ $< + +$(CORE_TEST) : core/test/stubs.o + +$(CORE_TEST) : % : %.c + $(HOSTCC) $(HOSTCFLAGS) -O0 -g -I include -I . -I libfdt -o $@ $< core/test/stubs.o + +$(CORE_TEST): % : %.d + +core/test/stubs.o: core/test/stubs.d + +core/test/%.d: core/test/%.c + $(HOSTCC) $(HOSTCFLAGS) -I include -I . -I libfdt -M $< > $@ + +-include core/test/*.d + +clean: core-test-clean + +core-test-clean: + $(RM) -f core/test/*.[od] $(CORE_TEST) diff --git a/core/test/run-device.c b/core/test/run-device.c new file mode 100644 index 0000000..fa9e951 --- /dev/null +++ b/core/test/run-device.c @@ -0,0 +1,118 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> + +/* Override this for testing. */ +#define is_rodata(p) fake_is_rodata(p) + +char __rodata_start[16]; +#define __rodata_end (__rodata_start + sizeof(__rodata_start)) + +static inline bool fake_is_rodata(const void *p) +{ + return ((char *)p >= __rodata_start && (char *)p < __rodata_end); +} + +#define zalloc(bytes) calloc((bytes), 1) + +#include "../device.c" +#include "../../ccan/list/list.c" /* For list_check */ +#include <assert.h> + +int main(void) +{ + struct dt_node *root, *c1, *c2, *gc1, *gc2, *gc3, *ggc1, *i; + const struct dt_property *p; + struct dt_property *p2; + unsigned int n; + + root = dt_new_root("root"); + assert(!list_top(&root->properties, struct dt_property, list)); + c1 = dt_new(root, "c1"); + assert(!list_top(&c1->properties, struct dt_property, list)); + c2 = dt_new(root, "c2"); + assert(!list_top(&c2->properties, struct dt_property, list)); + gc1 = dt_new(c1, "gc1"); + assert(!list_top(&gc1->properties, struct dt_property, list)); + gc2 = dt_new(c1, "gc2"); + assert(!list_top(&gc2->properties, struct dt_property, list)); + gc3 = dt_new(c1, "gc3"); + assert(!list_top(&gc3->properties, struct dt_property, list)); + ggc1 = dt_new(gc1, "ggc1"); + assert(!list_top(&ggc1->properties, struct dt_property, list)); + + for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) { + assert(!list_top(&i->properties, struct dt_property, list)); + dt_add_property_cells(i, "visited", 1); + } + assert(n == 6); + + for (n = 0, i = dt_first(root); i; i = dt_next(root, i), n++) { + p = list_top(&i->properties, struct dt_property, list); + assert(strcmp(p->name, "visited") == 0); + assert(p->len == sizeof(u32)); + assert(fdt32_to_cpu(*(u32 *)p->prop) == 1); + } + assert(n == 6); + + dt_add_property_cells(c1, "some-property", 1, 2, 3); + p = dt_find_property(c1, "some-property"); + assert(p); + assert(strcmp(p->name, "some-property") == 0); + assert(p->len == sizeof(u32) * 3); + assert(fdt32_to_cpu(*(u32 *)p->prop) == 1); + assert(fdt32_to_cpu(*((u32 *)p->prop + 1)) == 2); + assert(fdt32_to_cpu(*((u32 *)p->prop + 2)) == 3); + + /* Test freeing a single node */ + assert(!list_empty(&gc1->children)); + dt_free(ggc1); + assert(list_empty(&gc1->children)); + + /* Test rodata logic. */ + assert(!is_rodata("hello")); + assert(is_rodata(__rodata_start)); + strcpy(__rodata_start, "name"); + ggc1 = dt_new(root, __rodata_start); + assert(ggc1->name == __rodata_start); + + /* Test string node. */ + dt_add_property_string(ggc1, "somestring", "someval"); + assert(dt_has_node_property(ggc1, "somestring", "someval")); + assert(!dt_has_node_property(ggc1, "somestrin", "someval")); + assert(!dt_has_node_property(ggc1, "somestring", "someva")); + assert(!dt_has_node_property(ggc1, "somestring", "somevale")); + + /* Test resizing property. */ + p = p2 = __dt_find_property(c1, "some-property"); + assert(p); + n = p2->len; + while (p2 == p) { + n *= 2; + dt_resize_property(&p2, n); + } + + assert(dt_find_property(c1, "some-property") == p2); + list_check(&c1->properties, "properties after resizing"); + + dt_del_property(c1, p2); + list_check(&c1->properties, "properties after delete"); + + /* No leaks for valgrind! */ + dt_free(root); + return 0; +} diff --git a/core/test/run-malloc-speed.c b/core/test/run-malloc-speed.c new file mode 100644 index 0000000..edc7589 --- /dev/null +++ b/core/test/run-malloc-speed.c @@ -0,0 +1,94 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) +/* Don't include this, it's PPC-specific */ +#define __CPU_H +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; + +#include <stdlib.h> + +/* Use these before we undefine them below. */ +static inline void *real_malloc(size_t size) +{ + return malloc(size); +} + +static inline void real_free(void *p) +{ + return free(p); +} + +#include <skiboot.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../malloc.c" +#include "../mem_region.c" +#include "../device.c" + +#undef malloc +#undef free +#undef realloc + +#include <assert.h> +#include <stdio.h> + +char __rodata_start[1], __rodata_end[1]; +struct dt_node *dt_root; + +void lock(struct lock *l) +{ + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +#define TEST_HEAP_ORDER 27 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +#define NUM_ALLOCS 4096 + +int main(void) +{ + uint64_t i, len; + void *p[NUM_ALLOCS]; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)real_malloc(skiboot_heap.len); + + len = skiboot_heap.len / NUM_ALLOCS - sizeof(struct alloc_hdr); + for (i = 0; i < NUM_ALLOCS; i++) { + p[i] = __malloc(len, __location__); + assert(p[i] > region_start(&skiboot_heap)); + assert(p[i] + len <= region_start(&skiboot_heap) + + skiboot_heap.len); + } + assert(mem_check(&skiboot_heap)); + assert(mem_region_lock.lock_val == 0); + free(region_start(&skiboot_heap)); + return 0; +} diff --git a/core/test/run-malloc.c b/core/test/run-malloc.c new file mode 100644 index 0000000..226ce75 --- /dev/null +++ b/core/test/run-malloc.c @@ -0,0 +1,144 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) + +/* Don't include this, it's PPC-specific */ +#define __CPU_H +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; + +#include <skiboot.h> + +#define is_rodata(p) true + +#include "../mem_region.c" +#include "../malloc.c" +#include "../device.c" + +#include "mem_region-malloc.h" + +#define TEST_HEAP_ORDER 12 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +struct dt_node *dt_root; + +void lock(struct lock *l) +{ + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +static bool heap_empty(void) +{ + const struct alloc_hdr *h = region_start(&skiboot_heap); + return h->num_longs == skiboot_heap.len / sizeof(long); +} + +int main(void) +{ + char test_heap[TEST_HEAP_SIZE], *p, *p2, *p3, *p4; + size_t i; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)test_heap; + skiboot_heap.len = TEST_HEAP_SIZE; + + /* Allocations of various sizes. */ + for (i = 0; i < TEST_HEAP_ORDER; i++) { + p = malloc(1ULL << i); + assert(p); + assert(p > (char *)test_heap); + assert(p + (1ULL << i) <= (char *)test_heap + TEST_HEAP_SIZE); + assert(!mem_region_lock.lock_val); + free(p); + assert(!mem_region_lock.lock_val); + assert(heap_empty()); + } + + /* Realloc as malloc. */ + mem_region_lock.lock_val = 0; + p = realloc(NULL, 100); + assert(p); + assert(!mem_region_lock.lock_val); + + /* Realloc as free. */ + p = realloc(p, 0); + assert(!p); + assert(!mem_region_lock.lock_val); + assert(heap_empty()); + + /* Realloc longer. */ + p = realloc(NULL, 100); + assert(p); + assert(!mem_region_lock.lock_val); + p2 = realloc(p, 200); + assert(p2 == p); + assert(!mem_region_lock.lock_val); + free(p); + assert(!mem_region_lock.lock_val); + assert(heap_empty()); + + /* Realloc shorter. */ + mem_region_lock.lock_val = 0; + p = realloc(NULL, 100); + assert(!mem_region_lock.lock_val); + assert(p); + p2 = realloc(p, 1); + assert(!mem_region_lock.lock_val); + assert(p2 == p); + free(p); + assert(!mem_region_lock.lock_val); + assert(heap_empty()); + + /* Realloc with move. */ + p2 = malloc(TEST_HEAP_SIZE - 64 - sizeof(struct alloc_hdr)*2); + assert(p2); + p = malloc(64); + assert(p); + free(p2); + + p2 = realloc(p, 128); + assert(p2 != p); + free(p2); + assert(heap_empty()); + assert(!mem_region_lock.lock_val); + + /* Reproduce bug BZ109128/SW257364 */ + p = malloc(100); + p2 = malloc(100); + p3 = malloc(100); + p4 = malloc(100); + free(p2); + realloc(p,216); + free(p3); + free(p); + free(p4); + assert(heap_empty()); + assert(!mem_region_lock.lock_val); + + return 0; +} diff --git a/core/test/run-mem_region.c b/core/test/run-mem_region.c new file mode 100644 index 0000000..f0ad2c2 --- /dev/null +++ b/core/test/run-mem_region.c @@ -0,0 +1,250 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) +/* Don't include this, it's PPC-specific */ +#define __CPU_H +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; + +#include <stdlib.h> +#include <string.h> + +/* Use these before we override definitions below. */ +static void *__malloc(size_t size, const char *location __attribute__((unused))) +{ + return malloc(size); +} + +static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused))) +{ + return realloc(ptr, size); +} + +static inline void __free(void *p, const char *location __attribute__((unused))) +{ + return free(p); +} + +static void *__zalloc(size_t size, const char *location __attribute__((unused))) +{ + void *ptr = malloc(size); + memset(ptr, 0, size); + return ptr; +} + +#include <skiboot.h> + +#define is_rodata(p) true + +#include "../mem_region.c" +#include "../device.c" + +#include <assert.h> +#include <stdio.h> + +struct dt_node *dt_root; + +void lock(struct lock *l) +{ + l->lock_val++; +} + +void unlock(struct lock *l) +{ + l->lock_val--; +} + +#define TEST_HEAP_ORDER 12 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static bool heap_empty(void) +{ + const struct alloc_hdr *h = region_start(&skiboot_heap); + return h->num_longs == skiboot_heap.len / sizeof(long); +} + +int main(void) +{ + char *test_heap; + void *p, *ptrs[100]; + size_t i; + struct mem_region *r; + + /* Use malloc for the heap, so valgrind can find issues. */ + test_heap = __malloc(TEST_HEAP_SIZE, __location__); + skiboot_heap.start = (unsigned long)test_heap; + skiboot_heap.len = TEST_HEAP_SIZE; + + /* Allocations of various sizes. */ + for (i = 0; i < TEST_HEAP_ORDER; i++) { + p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here"); + assert(p); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "here")); + assert(p > (void *)test_heap); + assert(p + (1ULL << i) <= (void *)test_heap + TEST_HEAP_SIZE); + assert(mem_size(&skiboot_heap, p) >= 1ULL << i); + mem_free(&skiboot_heap, p, "freed"); + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "freed")); + } + p = mem_alloc(&skiboot_heap, 1ULL << i, 1, "here"); + assert(!p); + mem_free(&skiboot_heap, p, "freed"); + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + + /* Allocations of various alignments: use small alloc first. */ + ptrs[0] = mem_alloc(&skiboot_heap, 1, 1, "small"); + for (i = 0; ; i++) { + p = mem_alloc(&skiboot_heap, 1, 1ULL << i, "here"); + assert(mem_check(&skiboot_heap)); + /* We will eventually fail... */ + if (!p) { + assert(i >= TEST_HEAP_ORDER); + break; + } + assert(p); + assert((long)p % (1ULL << i) == 0); + assert(p > (void *)test_heap); + assert(p + 1 <= (void *)test_heap + TEST_HEAP_SIZE); + mem_free(&skiboot_heap, p, "freed"); + assert(mem_check(&skiboot_heap)); + } + mem_free(&skiboot_heap, ptrs[0], "small freed"); + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + + /* Many little allocations, freed in reverse order. */ + for (i = 0; i < 100; i++) { + ptrs[i] = mem_alloc(&skiboot_heap, sizeof(long), 1, "here"); + assert(ptrs[i]); + assert(ptrs[i] > (void *)test_heap); + assert(ptrs[i] + sizeof(long) + <= (void *)test_heap + TEST_HEAP_SIZE); + assert(mem_check(&skiboot_heap)); + } + for (i = 0; i < 100; i++) + mem_free(&skiboot_heap, ptrs[100 - 1 - i], "freed"); + + assert(heap_empty()); + assert(mem_check(&skiboot_heap)); + + /* Check the prev_free gets updated properly. */ + ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0]"); + ptrs[1] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[1]"); + assert(ptrs[1] > ptrs[0]); + mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free"); + assert(mem_check(&skiboot_heap)); + ptrs[0] = mem_alloc(&skiboot_heap, sizeof(long), 1, "ptrs[0] again"); + assert(mem_check(&skiboot_heap)); + mem_free(&skiboot_heap, ptrs[1], "ptrs[1] free"); + mem_free(&skiboot_heap, ptrs[0], "ptrs[0] free"); + assert(mem_check(&skiboot_heap)); + assert(heap_empty()); + +#if 0 + printf("Heap map:\n"); + for (i = 0; i < TEST_HEAP_SIZE / sizeof(long); i++) { + printf("%u", test_bit(skiboot_heap.bitmap, i)); + if (i % 64 == 63) + printf("\n"); + else if (i % 8 == 7) + printf(" "); + } +#endif + + /* Simple enlargement, then free */ + p = mem_alloc(&skiboot_heap, 1, 1, "one byte"); + assert(p); + assert(mem_resize(&skiboot_heap, p, 100, "hundred bytes")); + assert(mem_size(&skiboot_heap, p) >= 100); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "hundred bytes")); + mem_free(&skiboot_heap, p, "freed"); + + /* Simple shrink, then free */ + p = mem_alloc(&skiboot_heap, 100, 1, "100 bytes"); + assert(p); + assert(mem_resize(&skiboot_heap, p, 1, "1 byte")); + assert(mem_size(&skiboot_heap, p) < 100); + assert(mem_check(&skiboot_heap)); + assert(!strcmp(((struct alloc_hdr *)p)[-1].location, "1 byte")); + mem_free(&skiboot_heap, p, "freed"); + + /* Lots of resizing (enlarge). */ + p = mem_alloc(&skiboot_heap, 1, 1, "one byte"); + assert(p); + for (i = 1; i <= TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i++) { + assert(mem_resize(&skiboot_heap, p, i, "enlarge")); + assert(mem_size(&skiboot_heap, p) >= i); + assert(mem_check(&skiboot_heap)); + } + + /* Can't make it larger though. */ + assert(!mem_resize(&skiboot_heap, p, i, "enlarge")); + + for (i = TEST_HEAP_SIZE - sizeof(struct alloc_hdr); i > 0; i--) { + assert(mem_resize(&skiboot_heap, p, i, "shrink")); + assert(mem_check(&skiboot_heap)); + } + + mem_free(&skiboot_heap, p, "freed"); + assert(mem_check(&skiboot_heap)); + + /* Test splitting of a region. */ + r = new_region("base", (unsigned long)test_heap, + TEST_HEAP_SIZE, NULL, REGION_SKIBOOT_HEAP); + assert(add_region(r)); + r = new_region("splitter", (unsigned long)test_heap + TEST_HEAP_SIZE/4, + TEST_HEAP_SIZE/2, NULL, REGION_RESERVED); + assert(add_region(r)); + /* Now we should have *three* regions. */ + i = 0; + list_for_each(®ions, r, list) { + if (region_start(r) == test_heap) { + assert(r->len == TEST_HEAP_SIZE/4); + assert(strcmp(r->name, "base") == 0); + assert(r->type == REGION_SKIBOOT_HEAP); + } else if (region_start(r) == test_heap + TEST_HEAP_SIZE / 4) { + assert(r->len == TEST_HEAP_SIZE/2); + assert(strcmp(r->name, "splitter") == 0); + assert(r->type == REGION_RESERVED); + assert(!r->free_list.n.next); + } else if (region_start(r) == test_heap + TEST_HEAP_SIZE/4*3) { + assert(r->len == TEST_HEAP_SIZE/4); + assert(strcmp(r->name, "base") == 0); + assert(r->type == REGION_SKIBOOT_HEAP); + } else + abort(); + assert(mem_check(r)); + i++; + } + assert(i == 3); + while ((r = list_pop(®ions, struct mem_region, list)) != NULL) { + list_del(&r->list); + mem_free(&skiboot_heap, r, __location__); + } + assert(mem_region_lock.lock_val == 0); + __free(test_heap, ""); + return 0; +} diff --git a/core/test/run-mem_region_init.c b/core/test/run-mem_region_init.c new file mode 100644 index 0000000..a24cc7b --- /dev/null +++ b/core/test/run-mem_region_init.c @@ -0,0 +1,179 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) +/* Don't include this, it's PPC-specific */ +#define __CPU_H +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; + +#include <stdlib.h> + +/* Use these before we undefine them below. */ +static inline void *real_malloc(size_t size) +{ + return malloc(size); +} + +static inline void real_free(void *p) +{ + return free(p); +} + +#include "../malloc.c" + +#include <skiboot.h> +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false + +static inline char *skiboot_strdup(const char *str) +{ + char *ret = __malloc(strlen(str) + 1, ""); + return memcpy(ret, str, strlen(str) + 1); +} +#undef strdup +#define strdup skiboot_strdup + +#include "../device.c" + +#include <skiboot.h> + +#include <assert.h> +#include <stdio.h> + +void lock(struct lock *l) +{ + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +/* We actually need a lot of room for the bitmaps! */ +#define TEST_HEAP_ORDER 27 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char name[sizeof("memory@") + STR_MAX_CHARS(reg[0])]; + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (unsigned long long)start); + + mem = dt_new(dt_root, name); + assert(mem); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +int main(void) +{ + uint64_t end; + int builtins; + struct mem_region *r; + char *heap = real_malloc(TEST_HEAP_SIZE); + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)heap; + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.len = (unsigned long)heap; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + /* Make sure we overlap the heap, at least. */ + add_mem_node(0, 0x100000000ULL); + add_mem_node(0x100000000ULL, 0x100000000ULL); + end = 0x200000000ULL; + + /* Now convert. */ + mem_region_init(); + assert(mem_check(&skiboot_heap)); + + builtins = 0; + list_for_each(®ions, r, list) { + /* Regions must not overlap. */ + struct mem_region *r2, *pre = NULL, *post = NULL; + list_for_each(®ions, r2, list) { + if (r == r2) + continue; + assert(!overlaps(r, r2)); + } + + /* But should have exact neighbours. */ + list_for_each(®ions, r2, list) { + if (r == r2) + continue; + if (r2->start == r->start + r->len) + post = r2; + if (r2->start + r2->len == r->start) + pre = r2; + } + assert(r->start == 0 || pre); + assert(r->start + r->len == end || post); + + if (r == &skiboot_code_and_text || + r == &skiboot_heap || + r == &skiboot_after_heap || + r == &skiboot_cpu_stacks || + r == &skiboot_os_reserve) + builtins++; + else + assert(r->type == REGION_SKIBOOT_HEAP); + assert(mem_check(r)); + } + assert(builtins == 5); + + dt_free(dt_root); + + while ((r = list_pop(®ions, struct mem_region, list)) != NULL) { + list_del(&r->list); + if (r != &skiboot_code_and_text && + r != &skiboot_heap && + r != &skiboot_after_heap && + r != &skiboot_os_reserve && + r != &skiboot_cpu_stacks) { + free(r); + } + assert(mem_check(&skiboot_heap)); + } + assert(mem_region_lock.lock_val == 0); + real_free(heap); + return 0; +} diff --git a/core/test/run-mem_region_release_unused.c b/core/test/run-mem_region_release_unused.c new file mode 100644 index 0000000..e73cf25 --- /dev/null +++ b/core/test/run-mem_region_release_unused.c @@ -0,0 +1,177 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) +/* Don't include this, it's PPC-specific */ +#define __CPU_H +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; + +#include <stdlib.h> + +static void *__malloc(size_t size, const char *location __attribute__((unused))) +{ + return malloc(size); +} + +static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused))) +{ + return realloc(ptr, size); +} + +static void *__zalloc(size_t size, const char *location __attribute__((unused))) +{ + return calloc(size, 1); +} + +static inline void __free(void *p, const char *location __attribute__((unused))) +{ + return free(p); +} + +#include <skiboot.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false + +#include "../device.c" +#include <assert.h> +#include <stdio.h> + +void lock(struct lock *l) +{ + l->lock_val++; +} + +void unlock(struct lock *l) +{ + l->lock_val--; +} + +#define TEST_HEAP_ORDER 12 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char name[sizeof("memory@") + STR_MAX_CHARS(reg[0])]; + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (long long)start); + + mem = dt_new(dt_root, name); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +int main(void) +{ + uint64_t i; + struct mem_region *r, *other = NULL; + void *other_mem; + const char *last; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)malloc(TEST_HEAP_SIZE); + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.len = skiboot_heap.start; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + other_mem = malloc(1024*1024); + add_mem_node((unsigned long)other_mem, 1024*1024); + + /* Now convert. */ + mem_region_init(); + + /* Find our node to allocate from */ + list_for_each(®ions, r, list) { + if (region_start(r) == other_mem) + other = r; + } + /* This could happen if skiboot addresses clashed with our alloc. */ + assert(other); + assert(mem_check(other)); + + /* Allocate 1k from other region. */ + mem_alloc(other, 1024, 1, "1k"); + mem_region_release_unused(); + + assert(mem_check(&skiboot_heap)); + + /* Now we expect it to be split. */ + i = 0; + list_for_each(®ions, r, list) { + assert(mem_check(r)); + i++; + if (r == &skiboot_os_reserve) + continue; + if (r == &skiboot_code_and_text) + continue; + if (r == &skiboot_heap) + continue; + if (r == &skiboot_after_heap) + continue; + if (r == &skiboot_cpu_stacks) + continue; + if (r == other) { + assert(r->type == REGION_SKIBOOT_HEAP); + assert(r->len < 1024 * 1024); + } else { + assert(r->type == REGION_OS); + assert(r->start == other->start + other->len); + assert(r->start + r->len == other->start + 1024*1024); + } + } + assert(i == 7); + + last = NULL; + list_for_each(®ions, r, list) { + if (last != r->name && + strncmp(r->name, NODE_REGION_PREFIX, + strlen(NODE_REGION_PREFIX)) == 0) { + /* It's safe to cast away const as this is + * only going to happen in test code */ + free((void*)r->name); + break; + } + last = r->name; + } + + dt_free(dt_root); + free((void *)(long)skiboot_heap.start); + free(other_mem); + return 0; +} diff --git a/core/test/run-mem_region_release_unused_noalloc.c b/core/test/run-mem_region_release_unused_noalloc.c new file mode 100644 index 0000000..818e272 --- /dev/null +++ b/core/test/run-mem_region_release_unused_noalloc.c @@ -0,0 +1,159 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> + +#define BITS_PER_LONG (sizeof(long) * 8) +/* Don't include this, it's PPC-specific */ +#define __CPU_H +static unsigned int cpu_max_pir = 1; +struct cpu_thread { + unsigned int chip_id; +}; + +#include <stdlib.h> + +static void *__malloc(size_t size, const char *location __attribute__((unused))) +{ + return malloc(size); +} + +static void *__realloc(void *ptr, size_t size, const char *location __attribute__((unused))) +{ + return realloc(ptr, size); +} + +static void *__zalloc(size_t size, const char *location __attribute__((unused))) +{ + return calloc(size, 1); +} + +static inline void __free(void *p, const char *location __attribute__((unused))) +{ + return free(p); +} + +#include <skiboot.h> + +/* We need mem_region to accept __location__ */ +#define is_rodata(p) true +#include "../mem_region.c" + +/* But we need device tree to make copies of names. */ +#undef is_rodata +#define is_rodata(p) false + +#include "../device.c" +#include <assert.h> +#include <stdio.h> + +void lock(struct lock *l) +{ + l->lock_val++; +} + +void unlock(struct lock *l) +{ + l->lock_val--; +} + +#define TEST_HEAP_ORDER 12 +#define TEST_HEAP_SIZE (1ULL << TEST_HEAP_ORDER) + +static void add_mem_node(uint64_t start, uint64_t len) +{ + struct dt_node *mem; + u64 reg[2]; + char name[sizeof("memory@") + STR_MAX_CHARS(reg[0])]; + + /* reg contains start and length */ + reg[0] = cpu_to_be64(start); + reg[1] = cpu_to_be64(len); + + sprintf(name, "memory@%llx", (long long)start); + + mem = dt_new(dt_root, name); + dt_add_property_string(mem, "device_type", "memory"); + dt_add_property(mem, "reg", reg, sizeof(reg)); +} + +void add_chip_dev_associativity(struct dt_node *dev __attribute__((unused))) +{ +} + +int main(void) +{ + uint64_t i; + struct mem_region *r; + const char *last; + + /* Use malloc for the heap, so valgrind can find issues. */ + skiboot_heap.start = (unsigned long)malloc(TEST_HEAP_SIZE); + skiboot_heap.len = TEST_HEAP_SIZE; + skiboot_os_reserve.len = skiboot_heap.start; + + dt_root = dt_new_root(""); + dt_add_property_cells(dt_root, "#address-cells", 2); + dt_add_property_cells(dt_root, "#size-cells", 2); + + add_mem_node(0, 0x100000000ULL); + add_mem_node(0x100000000ULL, 0x100000000ULL); + + mem_region_init(); + + mem_region_release_unused(); + + assert(mem_check(&skiboot_heap)); + + /* Now we expect it to be split. */ + i = 0; + list_for_each(®ions, r, list) { + assert(mem_check(r)); + i++; + if (r == &skiboot_os_reserve) + continue; + if (r == &skiboot_code_and_text) + continue; + if (r == &skiboot_heap) + continue; + if (r == &skiboot_after_heap) + continue; + if (r == &skiboot_cpu_stacks) + continue; + + /* the memory nodes should all be available to the OS now */ + assert(r->type == REGION_OS); + } + assert(i == 9); + + last = NULL; + list_for_each(®ions, r, list) { + if (last != r->name && + strncmp(r->name, NODE_REGION_PREFIX, + strlen(NODE_REGION_PREFIX)) == 0) { + /* It's safe to cast away the const as + * this never happens at runtime, + * only in test and only for valgrind + */ + free((void*)r->name); + } + last = r->name; + } + + dt_free(dt_root); + free((void *)(long)skiboot_heap.start); + return 0; +} diff --git a/core/test/run-msg.c b/core/test/run-msg.c new file mode 100644 index 0000000..cd36408 --- /dev/null +++ b/core/test/run-msg.c @@ -0,0 +1,256 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <skiboot.h> +#include <inttypes.h> +#include <assert.h> + +static bool zalloc_should_fail = false; +static void *zalloc(size_t size) +{ + if (zalloc_should_fail) { + errno = ENOMEM; + return NULL; + } + + return calloc(size, 1); +} + +#include "../opal-msg.c" + +void lock(struct lock *l) +{ + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +void opal_update_pending_evt(uint64_t evt_mask, uint64_t evt_values) +{ + (void)evt_mask; + (void)evt_values; +} + +static long magic = 8097883813087437089UL; +static void callback(void *data) +{ + assert(*(uint64_t *)data == magic); +} + +static size_t list_count(struct list_head *list) +{ + size_t count = 0; + struct opal_msg_entry *dummy; + + list_for_each(list, dummy, link) + count++; + return count; +} + +int main(void) +{ + struct opal_msg_entry* entry; + int free_size = OPAL_MAX_MSGS; + int nfree = free_size; + int npending = 0; + int r; + static struct opal_msg m; + uint64_t *m_ptr = (uint64_t *)&m; + + opal_init_msg(); + + assert(list_count(&msg_pending_list) == npending); + assert(list_count(&msg_free_list) == nfree); + + /* Callback. */ + r = opal_queue_msg(0, &magic, callback, (u64)0, (u64)1, (u64)2); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(m.params[0] == 0); + assert(m.params[1] == 1); + assert(m.params[2] == 2); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + /* No params. */ + r = opal_queue_msg(0, NULL, NULL); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + /* > 8 params (ARRAY_SIZE(entry->msg.params) */ + r = opal_queue_msg(0, NULL, NULL, 0, 1, 2, 3, 4, 5, 6, 7, 0xBADDA7A); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + assert(m.params[0] == 0); + assert(m.params[1] == 1); + assert(m.params[2] == 2); + assert(m.params[3] == 3); + assert(m.params[4] == 4); + assert(m.params[5] == 5); + assert(m.params[6] == 6); + assert(m.params[7] == 7); + + /* 8 params (ARRAY_SIZE(entry->msg.params) */ + r = opal_queue_msg(0, NULL, NULL, 0, 10, 20, 30, 40, 50, 60, 70); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + assert(list_count(&msg_pending_list) == --npending); + assert(list_count(&msg_free_list) == ++nfree); + + assert(m.params[0] == 0); + assert(m.params[1] == 10); + assert(m.params[2] == 20); + assert(m.params[3] == 30); + assert(m.params[4] == 40); + assert(m.params[5] == 50); + assert(m.params[6] == 60); + assert(m.params[7] == 70); + + /* Full list (no free nodes in pending). */ + while (nfree > 0) { + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == 0); + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + } + assert(list_count(&msg_free_list) == 0); + assert(nfree == 0); + assert(npending == OPAL_MAX_MSGS); + + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == 0); + + assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1); + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == nfree); + + /* Make zalloc fail to test error handling. */ + zalloc_should_fail = true; + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == OPAL_RESOURCE); + + assert(list_count(&msg_pending_list) == OPAL_MAX_MSGS+1); + assert(list_count(&msg_pending_list) == npending); + assert(list_count(&msg_free_list) == nfree); + + /* Empty list (no nodes). */ + while(!list_empty(&msg_pending_list)) { + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + npending--; + nfree++; + } + assert(list_count(&msg_pending_list) == npending); + assert(list_count(&msg_free_list) == nfree); + assert(npending == 0); + assert(nfree == OPAL_MAX_MSGS+1); + + r = opal_queue_msg(OPAL_MSG_ASYNC_COMP, NULL, NULL); + assert(r == 0); + + assert(list_count(&msg_pending_list) == ++npending); + assert(list_count(&msg_free_list) == --nfree); + + /* Request invalid size. */ + r = opal_get_msg(m_ptr, sizeof(m) - 1); + assert(r == OPAL_PARAMETER); + + /* Pass null buffer. */ + r = opal_get_msg(NULL, sizeof(m)); + assert(r == OPAL_PARAMETER); + + /* Get msg when none are pending. */ + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == 0); + + r = opal_get_msg(m_ptr, sizeof(m)); + assert(r == OPAL_RESOURCE); + +#define test_queue_num(type, val) \ + r = opal_queue_msg(0, NULL, NULL, \ + (type)val, (type)val, (type)val, (type)val, \ + (type)val, (type)val, (type)val, (type)val); \ + assert(r == 0); \ + opal_get_msg(m_ptr, sizeof(m)); \ + assert(r == OPAL_SUCCESS); \ + assert(m.params[0] == (type)val); \ + assert(m.params[1] == (type)val); \ + assert(m.params[2] == (type)val); \ + assert(m.params[3] == (type)val); \ + assert(m.params[4] == (type)val); \ + assert(m.params[5] == (type)val); \ + assert(m.params[6] == (type)val); \ + assert(m.params[7] == (type)val) + + /* Test types of various widths */ + test_queue_num(u64, -1); + test_queue_num(s64, -1); + test_queue_num(u32, -1); + test_queue_num(s32, -1); + test_queue_num(u16, -1); + test_queue_num(s16, -1); + test_queue_num(u8, -1); + test_queue_num(s8, -1); + + /* Clean up the list to keep valgrind happy. */ + while(!list_empty(&msg_free_list)) { + entry = list_pop(&msg_free_list, struct opal_msg_entry, link); + assert(entry); + free(entry); + } + + while(!list_empty(&msg_pending_list)) { + entry = list_pop(&msg_pending_list, struct opal_msg_entry, link); + assert(entry); + free(entry); + } + + return 0; +} diff --git a/core/test/run-trace.c b/core/test/run-trace.c new file mode 100644 index 0000000..7dabebd --- /dev/null +++ b/core/test/run-trace.c @@ -0,0 +1,386 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <config.h> +#include <stdlib.h> +#include <assert.h> +#include <sched.h> +#include <stdlib.h> +#include <stdint.h> +#include <unistd.h> +#include <stdio.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/wait.h> + +/* Don't include these: PPC-specific */ +#define __CPU_H +#define __TIME_H +#define __PROCESSOR_H + +#if defined(__i386__) || defined(__x86_64__) +/* This is more than a lwsync, but it'll work */ +static void full_barrier(void) +{ + asm volatile("mfence" : : : "memory"); +} +#define lwsync full_barrier +#define sync full_barrier +#else +#error "Define sync & lwsync for this arch" +#endif + +#define zalloc(size) calloc((size), 1) + +struct cpu_thread { + uint32_t pir; + uint32_t chip_id; + struct trace_info *trace; + int server_no; + bool is_secondary; + struct cpu_thread *primary; +}; +static struct cpu_thread *this_cpu(void); + +#define CPUS 4 + +static struct cpu_thread fake_cpus[CPUS]; + +static inline struct cpu_thread *next_cpu(struct cpu_thread *cpu) +{ + if (cpu == NULL) + return &fake_cpus[0]; + cpu++; + if (cpu == &fake_cpus[CPUS]) + return NULL; + return cpu; +} + +#define first_cpu() next_cpu(NULL) + +#define for_each_cpu(cpu) \ + for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) + +static unsigned long timestamp; +static unsigned long mftb(void) +{ + return timestamp; +} + +static void *local_alloc(unsigned int chip_id, + size_t size, size_t align) +{ + void *p; + + (void)chip_id; + if (posix_memalign(&p, align, size)) + p = NULL; + return p; +} + +struct dt_node; +extern struct dt_node *opal_node; + +#include "../trace.c" + +#define rmb() lwsync() + +#include "../external/trace.c" +#include "../device.c" + +char __rodata_start[1], __rodata_end[1]; +struct dt_node *opal_node; +struct debug_descriptor debug_descriptor = { + .trace_mask = -1 +}; + +void lock(struct lock *l) +{ + assert(!l->lock_val); + l->lock_val = 1; +} + +void unlock(struct lock *l) +{ + assert(l->lock_val); + l->lock_val = 0; +} + +struct cpu_thread *my_fake_cpu; +static struct cpu_thread *this_cpu(void) +{ + return my_fake_cpu; +} + +#include <sys/mman.h> +#define PER_CHILD_TRACES (1024*1024) + +static void write_trace_entries(int id) +{ + void exit(int); + unsigned int i; + union trace trace; + + timestamp = id; + for (i = 0; i < PER_CHILD_TRACES; i++) { + timestamp = i * CPUS + id; + assert(sizeof(trace.hdr) % 8 == 0); + /* First child never repeats, second repeats once, etc. */ + trace_add(&trace, 3 + ((i / (id + 1)) % 0x40), + sizeof(trace.hdr)); + } + + /* Final entry has special type, so parent knows it's over. */ + trace_add(&trace, 0x70, sizeof(trace.hdr)); + exit(0); +} + +static bool all_done(const bool done[]) +{ + unsigned int i; + + for (i = 0; i < CPUS; i++) + if (!done[i]) + return false; + return true; +} + +static void test_parallel(void) +{ + void *p; + unsigned int i, counts[CPUS] = { 0 }, overflows[CPUS] = { 0 }; + unsigned int repeats[CPUS] = { 0 }, num_overflows[CPUS] = { 0 }; + bool done[CPUS] = { false }; + size_t len = sizeof(struct trace_info) + TBUF_SZ + sizeof(union trace); + int last = 0; + + /* Use a shared mmap to test actual parallel buffers. */ + i = (CPUS*len + getpagesize()-1)&~(getpagesize()-1); + p = mmap(NULL, i, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_SHARED, -1, 0); + + for (i = 0; i < CPUS; i++) { + fake_cpus[i].trace = p + i * len; + fake_cpus[i].trace->tb.mask = TBUF_SZ - 1; + fake_cpus[i].trace->tb.max_size = sizeof(union trace); + fake_cpus[i].is_secondary = false; + } + + for (i = 0; i < CPUS; i++) { + if (!fork()) { + /* Child. */ + my_fake_cpu = &fake_cpus[i]; + write_trace_entries(i); + } + } + + while (!all_done(done)) { + union trace t; + + for (i = 0; i < CPUS; i++) { + if (trace_get(&t, &fake_cpus[(i+last) % CPUS].trace->tb)) + break; + } + + if (i == CPUS) { + sched_yield(); + continue; + } + i = (i + last) % CPUS; + last = i; + + assert(t.hdr.cpu < CPUS); + assert(!done[t.hdr.cpu]); + + if (t.hdr.type == TRACE_OVERFLOW) { + /* Conveniently, each record is 16 bytes here. */ + assert(t.overflow.bytes_missed % 16 == 0); + overflows[i] += t.overflow.bytes_missed / 16; + num_overflows[i]++; + continue; + } + + assert(t.hdr.timestamp % CPUS == t.hdr.cpu); + if (t.hdr.type == TRACE_REPEAT) { + assert(t.hdr.len_div_8 * 8 == sizeof(t.repeat)); + assert(t.repeat.num != 0); + assert(t.repeat.num <= t.hdr.cpu); + repeats[t.hdr.cpu] += t.repeat.num; + } else if (t.hdr.type == 0x70) { + done[t.hdr.cpu] = true; + } else { + counts[t.hdr.cpu]++; + } + } + + /* Gather children. */ + for (i = 0; i < CPUS; i++) { + int status; + wait(&status); + } + + for (i = 0; i < CPUS; i++) { + printf("Child %i: %u produced, %u overflows, %llu total\n", i, + counts[i], overflows[i], + (long long)fake_cpus[i].trace->tb.end); + assert(counts[i] + repeats[i] <= PER_CHILD_TRACES); + } + /* Child 0 never repeats. */ + assert(repeats[0] == 0); + assert(counts[0] + overflows[0] == PER_CHILD_TRACES); + + /* + * FIXME: Other children have some fuzz, since overflows may + * include repeat record we already read. And odd-numbered + * overflows may include more repeat records than normal + * records (they alternate). + */ +} + +int main(void) +{ + union trace minimal; + union trace large; + union trace trace; + unsigned int i, j; + + opal_node = dt_new_root("opal"); + for (i = 0; i < CPUS; i++) { + fake_cpus[i].server_no = i; + fake_cpus[i].is_secondary = (i & 0x1); + fake_cpus[i].primary = &fake_cpus[i & ~0x1]; + } + init_trace_buffers(); + my_fake_cpu = &fake_cpus[0]; + + for (i = 0; i < CPUS; i++) { + assert(trace_empty(&fake_cpus[i].trace->tb)); + assert(!trace_get(&trace, &fake_cpus[i].trace->tb)); + } + + assert(sizeof(trace.hdr) % 8 == 0); + timestamp = 1; + trace_add(&minimal, 100, sizeof(trace.hdr)); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.timestamp == timestamp); + + /* Make it wrap once. */ + for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8) + 1; i++) { + timestamp = i; + trace_add(&minimal, 99 + (i%2), sizeof(trace.hdr)); + } + + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + /* First one must be overflow marker. */ + assert(trace.hdr.type == TRACE_OVERFLOW); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.overflow)); + assert(trace.overflow.bytes_missed == minimal.hdr.len_div_8 * 8); + + for (i = 0; i < TBUF_SZ / (minimal.hdr.len_div_8 * 8); i++) { + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.timestamp == i+1); + assert(trace.hdr.type == 99 + ((i+1)%2)); + } + assert(!trace_get(&trace, &my_fake_cpu->trace->tb)); + + /* Now put in some weird-length ones, to test overlap. + * Last power of 2, minus 8. */ + for (j = 0; (1 << j) < sizeof(large); j++); + for (i = 0; i < TBUF_SZ; i++) { + timestamp = i; + trace_add(&large, 100 + (i%2), (1 << (j-1))); + } + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.type == TRACE_OVERFLOW); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.len_div_8 == large.hdr.len_div_8); + i = trace.hdr.timestamp; + while (trace_get(&trace, &my_fake_cpu->trace->tb)) + assert(trace.hdr.timestamp == ++i); + + /* Test repeats. */ + for (i = 0; i < 65538; i++) { + timestamp = i; + trace_add(&minimal, 100, sizeof(trace.hdr)); + } + timestamp = i; + trace_add(&minimal, 101, sizeof(trace.hdr)); + timestamp = i+1; + trace_add(&minimal, 101, sizeof(trace.hdr)); + + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.timestamp == 0); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 100); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(trace.repeat.num == 65535); + assert(trace.repeat.timestamp == 65535); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.timestamp == 65536); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 100); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(trace.repeat.num == 1); + assert(trace.repeat.timestamp == 65537); + + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.timestamp == 65538); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 101); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(trace.repeat.num == 1); + assert(trace.repeat.timestamp == 65539); + + /* Now, test adding repeat while we're reading... */ + timestamp = 0; + trace_add(&minimal, 100, sizeof(trace.hdr)); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + assert(trace.hdr.timestamp == 0); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + assert(trace.hdr.type == 100); + + for (i = 1; i < TBUF_SZ; i++) { + timestamp = i; + trace_add(&minimal, 100, sizeof(trace.hdr)); + assert(trace_get(&trace, &my_fake_cpu->trace->tb)); + if (i % 65536 == 0) { + assert(trace.hdr.type == 100); + assert(trace.hdr.len_div_8 == minimal.hdr.len_div_8); + } else { + assert(trace.hdr.type == TRACE_REPEAT); + assert(trace.hdr.len_div_8 * 8 == sizeof(trace.repeat)); + assert(trace.repeat.num == 1); + } + assert(trace.repeat.timestamp == i); + assert(!trace_get(&trace, &my_fake_cpu->trace->tb)); + } + + for (i = 0; i < CPUS; i++) + if (!fake_cpus[i].is_secondary) + free(fake_cpus[i].trace); + + test_parallel(); + + return 0; +} diff --git a/core/test/stubs.c b/core/test/stubs.c new file mode 100644 index 0000000..3233455 --- /dev/null +++ b/core/test/stubs.c @@ -0,0 +1,43 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Add any stub functions required for linking here. */ +#include <stdlib.h> + +static void stub_function(void) +{ + abort(); +} + +#define STUB(fnname) \ + void fnname(void) __attribute__((weak, alias ("stub_function"))) + +STUB(fdt_begin_node); +STUB(fdt_property); +STUB(fdt_end_node); +STUB(fdt_create); +STUB(fdt_add_reservemap_entry); +STUB(fdt_finish_reservemap); +STUB(fdt_strerror); +STUB(fdt_check_header); +STUB(_fdt_check_node_offset); +STUB(fdt_next_tag); +STUB(fdt_string); +STUB(fdt_get_name); +STUB(dt_first); +STUB(dt_next); +STUB(dt_has_node_property); +STUB(dt_get_address); +STUB(add_chip_dev_associativity); diff --git a/core/timebase.c b/core/timebase.c new file mode 100644 index 0000000..d51e96b --- /dev/null +++ b/core/timebase.c @@ -0,0 +1,67 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <timebase.h> +#include <fsp.h> + +void time_wait(unsigned long duration) +{ + unsigned long end = mftb() + duration; + + while(tb_compare(mftb(), end) != TB_AAFTERB) + fsp_poll(); +} + +void time_wait_ms(unsigned long ms) +{ + time_wait(msecs_to_tb(ms)); +} + +void time_wait_us(unsigned long us) +{ + time_wait(usecs_to_tb(us)); +} + +unsigned long timespec_to_tb(const struct timespec *ts) +{ + unsigned long ns; + + /* First convert to ns */ + ns = ts->tv_sec * 1000000000ul; + ns += ts->tv_nsec; + + /* + * This is a very rough approximation, it works provided + * we never try to pass too long delays here and the TB + * frequency isn't significantly lower than 512Mhz. + * + * We could improve the precision by shifting less bits + * at the expense of capacity or do 128 bit math which + * I'm not eager to do :-) + */ + return (ns * (tb_hz >> 24)) / (1000000000ul >> 24); +} + +int nanosleep(const struct timespec *req, struct timespec *rem) +{ + time_wait(timespec_to_tb(req)); + + if (rem) { + rem->tv_sec = 0; + rem->tv_nsec = 0; + } + return 0; +} diff --git a/core/trace.c b/core/trace.c new file mode 100644 index 0000000..76f3c30 --- /dev/null +++ b/core/trace.c @@ -0,0 +1,244 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <trace.h> +#include <timebase.h> +#include <lock.h> +#include <string.h> +#include <stdlib.h> +#include <cpu.h> +#include <device.h> +#include <libfdt.h> +#include <processor.h> +#include <skiboot.h> + +#define DEBUG_TRACES + +#define MAX_SIZE (sizeof(union trace) + 7) + +/* Smaller trace buffer for early booting */ +#define BOOT_TBUF_SZ 65536 +static struct { + struct trace_info trace_info; + char buf[BOOT_TBUF_SZ + MAX_SIZE]; +} boot_tracebuf __section(".data.boot_trace") = { + .trace_info = { + .lock = LOCK_UNLOCKED, + .tb = { + .mask = BOOT_TBUF_SZ - 1, + .max_size = MAX_SIZE + }, + }, + .buf = { 0 } +}; + +void init_boot_tracebuf(struct cpu_thread *boot_cpu) +{ + boot_cpu->trace = &boot_tracebuf.trace_info; +} + +static size_t tracebuf_extra(void) +{ + /* We make room for the largest possible record */ + return TBUF_SZ + MAX_SIZE; +} + +/* To avoid bloating each entry, repeats are actually specific entries. + * tb->last points to the last (non-repeat) entry. */ +static bool handle_repeat(struct tracebuf *tb, const union trace *trace) +{ + struct trace_hdr *prev; + struct trace_repeat *rpt; + u32 len; + + prev = (void *)tb->buf + (tb->last & tb->mask); + + if (prev->type != trace->hdr.type + || prev->len_div_8 != trace->hdr.len_div_8 + || prev->cpu != trace->hdr.cpu) + return false; + + len = prev->len_div_8 << 3; + if (memcmp(prev + 1, &trace->hdr + 1, len - sizeof(*prev)) != 0) + return false; + + /* If they've consumed prev entry, don't repeat. */ + if (tb->last < tb->start) + return false; + + /* OK, it's a duplicate. Do we already have repeat? */ + if (tb->last + len != tb->end) { + /* FIXME: Reader is not protected from seeing this! */ + rpt = (void *)tb->buf + ((tb->last + len) & tb->mask); + assert(tb->last + len + rpt->len_div_8*8 == tb->end); + assert(rpt->type == TRACE_REPEAT); + + /* If this repeat entry is full, don't repeat. */ + if (rpt->num == 0xFFFF) + return false; + + rpt->num++; + rpt->timestamp = trace->hdr.timestamp; + return true; + } + + /* + * Generate repeat entry: it's the smallest possible entry, so we + * must have eliminated old entries. + */ + assert(trace->hdr.len_div_8 * 8 >= sizeof(*rpt)); + + rpt = (void *)tb->buf + (tb->end & tb->mask); + rpt->timestamp = trace->hdr.timestamp; + rpt->type = TRACE_REPEAT; + rpt->len_div_8 = sizeof(*rpt) >> 3; + rpt->cpu = trace->hdr.cpu; + rpt->prev_len = trace->hdr.len_div_8 << 3; + rpt->num = 1; + lwsync(); /* write barrier: complete repeat record before exposing */ + tb->end += sizeof(*rpt); + return true; +} + +void trace_add(union trace *trace, u8 type, u16 len) +{ + struct trace_info *ti = this_cpu()->trace; + unsigned int tsz; + + trace->hdr.type = type; + trace->hdr.len_div_8 = (len + 7) >> 3; + + tsz = trace->hdr.len_div_8 << 3; + +#ifdef DEBUG_TRACES + assert(tsz >= sizeof(trace->hdr)); + assert(tsz <= sizeof(*trace)); + assert(trace->hdr.type != TRACE_REPEAT); + assert(trace->hdr.type != TRACE_OVERFLOW); +#endif + /* Skip traces not enabled in the debug descriptor */ + if (!((1ul << trace->hdr.type) & debug_descriptor.trace_mask)) + return; + + trace->hdr.timestamp = mftb(); + trace->hdr.cpu = this_cpu()->server_no; + + lock(&ti->lock); + + /* Throw away old entries before we overwrite them. */ + while ((ti->tb.start + ti->tb.mask + 1) < (ti->tb.end + tsz)) { + struct trace_hdr *hdr; + + hdr = (void *)ti->tb.buf + (ti->tb.start & ti->tb.mask); + ti->tb.start += hdr->len_div_8 << 3; + } + + /* Must update ->start before we rewrite new entries. */ + lwsync(); /* write barrier */ + + /* Check for duplicates... */ + if (!handle_repeat(&ti->tb, trace)) { + /* This may go off end, and that's why ti->tb.buf is oversize */ + memcpy(ti->tb.buf + (ti->tb.end & ti->tb.mask), trace, tsz); + ti->tb.last = ti->tb.end; + lwsync(); /* write barrier: write entry before exposing */ + ti->tb.end += tsz; + } + unlock(&ti->lock); +} + +static void trace_add_dt_props(void) +{ + unsigned int i; + u64 *prop, tmask; + + prop = malloc(sizeof(u64) * 2 * debug_descriptor.num_traces); + + for (i = 0; i < debug_descriptor.num_traces; i++) { + prop[i * 2] = cpu_to_fdt64(debug_descriptor.trace_phys[i]); + prop[i * 2 + 1] = cpu_to_fdt64(debug_descriptor.trace_size[i]); + } + + dt_add_property(opal_node, "ibm,opal-traces", + prop, sizeof(u64) * 2 * i); + free(prop); + + tmask = (uint64_t)&debug_descriptor.trace_mask; + dt_add_property_cells(opal_node, "ibm,opal-trace-mask", + hi32(tmask), lo32(tmask)); +} + +static void trace_add_desc(struct trace_info *t, uint64_t size) +{ + unsigned int i = debug_descriptor.num_traces; + + if (i >= DEBUG_DESC_MAX_TRACES) { + prerror("TRACE: Debug descriptor trace list full !\n"); + return; + } + debug_descriptor.num_traces++; + + debug_descriptor.trace_phys[i] = (uint64_t)&t->tb; + debug_descriptor.trace_tce[i] = 0; /* populated later */ + debug_descriptor.trace_size[i] = size; +} + +/* Allocate trace buffers once we know memory topology */ +void init_trace_buffers(void) +{ + struct cpu_thread *t; + struct trace_info *any = &boot_tracebuf.trace_info; + uint64_t size; + + /* Boot the boot trace in the debug descriptor */ + trace_add_desc(any, sizeof(boot_tracebuf.buf)); + + /* Allocate a trace buffer for each primary cpu. */ + for_each_cpu(t) { + if (t->is_secondary) + continue; + + /* Use a 4K alignment for TCE mapping */ + size = ALIGN_UP(sizeof(*t->trace) + tracebuf_extra(), 0x1000); + t->trace = local_alloc(t->chip_id, size, 0x1000); + if (t->trace) { + any = t->trace; + memset(t->trace, 0, size); + init_lock(&t->trace->lock); + t->trace->tb.mask = TBUF_SZ - 1; + t->trace->tb.max_size = MAX_SIZE; + trace_add_desc(any, sizeof(t->trace->tb) + + tracebuf_extra()); + } else + prerror("TRACE: cpu 0x%x allocation failed\n", t->pir); + } + + /* In case any allocations failed, share trace buffers. */ + for_each_cpu(t) { + if (!t->is_secondary && !t->trace) + t->trace = any; + } + + /* And copy those to the secondaries. */ + for_each_cpu(t) { + if (!t->is_secondary) + continue; + t->trace = t->primary->trace; + } + + /* Trace node in DT. */ + trace_add_dt_props(); +} diff --git a/core/utils.c b/core/utils.c new file mode 100644 index 0000000..2bc57b1 --- /dev/null +++ b/core/utils.c @@ -0,0 +1,59 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <lock.h> +#include <fsp.h> +#include <processor.h> + +void abort(void) +{ + static bool in_abort = false; + unsigned long hid0; + + if (in_abort) + for (;;) ; + in_abort = true; + + bust_locks = true; + + op_display(OP_FATAL, OP_MOD_CORE, 0x6666); + + fputs("Aborting!\n", stderr); + backtrace(); + + /* XXX FIXME: We should fsp_poll for a while to ensure any pending + * console writes have made it out, but until we have decent PSI + * link handling we must not do it forever. Polling can prevent the + * FSP from bringing the PSI link up and it can get stuck in a + * reboot loop. + */ + + hid0 = mfspr(SPR_HID0); + hid0 |= SPR_HID0_ENABLE_ATTN; + set_hid0(hid0); + trigger_attn(); + for (;;) ; +} + +char __attrconst tohex(uint8_t nibble) +{ + static const char __tohex[] = {'0','1','2','3','4','5','6','7','8','9', + 'A','B','C','D','E','F'}; + if (nibble > 0xf) + return '?'; + return __tohex[nibble]; +} diff --git a/core/vpd.c b/core/vpd.c new file mode 100644 index 0000000..deb552c --- /dev/null +++ b/core/vpd.c @@ -0,0 +1,211 @@ +/* Copyright 2013-2014 IBM Corp. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <skiboot.h> +#include <vpd.h> +#include <string.h> +#include <fsp.h> +#include <device.h> + +#define CHECK_SPACE(_p, _n, _e) (((_e) - (_p)) >= (_n)) + +/* Low level keyword search in a record. Can be used when we + * need to find the next keyword of a given type, for example + * when having multiple MF/SM keyword pairs + */ +const void *vpd_find_keyword(const void *rec, size_t rec_sz, + const char *kw, uint8_t *kw_size) +{ + const uint8_t *p = rec, *end = rec + rec_sz; + + while (CHECK_SPACE(p, 3, end)) { + uint8_t k1 = *(p++); + uint8_t k2 = *(p++); + uint8_t sz = *(p++); + + if (k1 == kw[0] && k2 == kw[1]) { + if (kw_size) + *kw_size = sz; + return p; + } + p += sz; + } + return NULL; +} + +/* Locate a record in a VPD blob + * + * Note: This works with VPD LIDs. It will scan until it finds + * the first 0x84, so it will skip all those 0's that the VPD + * LIDs seem to contain + */ +const void *vpd_find_record(const void *vpd, size_t vpd_size, + const char *record, size_t *sz) +{ + const uint8_t *p = vpd, *end = vpd + vpd_size; + bool first_start = true; + size_t rec_sz; + uint8_t namesz = 0; + const char *rec_name; + + while (CHECK_SPACE(p, 4, end)) { + /* Get header byte */ + if (*(p++) != 0x84) { + /* Skip initial crap in VPD LIDs */ + if (first_start) + continue; + break; + } + first_start = false; + rec_sz = *(p++); + rec_sz |= *(p++) << 8; + if (!CHECK_SPACE(p, rec_sz, end)) { + prerror("VPD: Malformed or truncated VPD," + " record size doesn't fit\n"); + return NULL; + } + + /* Find record name */ + rec_name = vpd_find_keyword(p, rec_sz, "RT", &namesz); + if (rec_name && strncmp(record, rec_name, namesz) == 0) { + *sz = rec_sz; + return p; + } + + p += rec_sz; + if (*(p++) != 0x78) { + prerror("VPD: Malformed or truncated VPD," + " missing final 0x78 in record %.4s\n", + rec_name ? rec_name : "????"); + return NULL; + } + } + return NULL; +} + +/* Locate a keyword in a record in a VPD blob + * + * Note: This works with VPD LIDs. It will scan until it finds + * the first 0x84, so it will skip all those 0's that the VPD + * LIDs seem to contain + */ +const void *vpd_find(const void *vpd, size_t vpd_size, + const char *record, const char *keyword, + uint8_t *sz) +{ + size_t rec_sz; + const uint8_t *p; + + p = vpd_find_record(vpd, vpd_size, record, &rec_sz); + if (p) + p = vpd_find_keyword(p, rec_sz, keyword, sz); + return p; +} + +/* Helper to load a VPD LID. Pass a ptr to the corresponding LX keyword */ +static void *vpd_lid_load(const uint8_t *lx, uint8_t lxrn, size_t *size) +{ + /* Now this is a guess game as we don't have the info from the + * pHyp folks. But basically, it seems to boil down to loading + * a LID whose name is 0x80e000yy where yy is the last 2 digits + * of the LX record in hex. + * + * [ Correction: After a chat with some folks, it looks like it's + * actually 4 digits, though the lid number is limited to fff + * so we weren't far off. ] + * + * For safety, we look for a matching LX record in an LXRn + * (n = lxrn argument) or in VINI if lxrn=0xff + */ + uint32_t lid_no = 0x80e00000 | ((lx[6] & 0xf) << 8) | lx[7]; + + /* We don't quite know how to get to the LID directory so + * we don't know the size. Let's allocate 16K. All the VPD LIDs + * I've seen so far are much smaller. + */ +#define VPD_LID_MAX_SIZE 0x4000 + void *data = malloc(VPD_LID_MAX_SIZE); + char record[4] = "LXR0"; + const void *valid_lx; + uint8_t lx_size; + int rc; + + if (!data) { + prerror("VPD: Failed to allocate memory for LID\n"); + return NULL; + } + + /* Adjust LID number for flash side */ + lid_no = fsp_adjust_lid_side(lid_no); + printf("VPD: Trying to load VPD LID 0x%08x...\n", lid_no); + + *size = VPD_LID_MAX_SIZE; + + /* Load it from the FSP */ + rc = fsp_fetch_data(0, FSP_DATASET_NONSP_LID, lid_no, 0, data, size); + if (rc) { + prerror("VPD: Error %d loading VPD LID\n", rc); + goto fail; + } + + /* Validate it */ + if (lxrn < 9) + record[3] = '0' + lxrn; + else + memcpy(record, "VINI", 4); + + valid_lx = vpd_find(data, *size, record, "LX", &lx_size); + if (!valid_lx || lx_size != 8) { + prerror("VPD: Cannot find validation LX record\n"); + goto fail; + } + if (memcmp(valid_lx, lx, 8) != 0) { + prerror("VPD: LX record mismatch !\n"); + goto fail; + } + + printf("VPD: Loaded %zu bytes\n", *size); + + /* Got it ! */ + return realloc(data, *size); + fail: + free(data); + return NULL; +} + +void vpd_iohub_load(struct dt_node *hub_node) +{ + void *vpd; + size_t sz; + const uint32_t *p; + unsigned int lx_idx; + const char *lxr; + + p = dt_prop_get_def(hub_node, "ibm,vpd-lx-info", NULL); + if (!p) + return; + + lx_idx = p[0]; + lxr = (const char *)&p[1]; + + vpd = vpd_lid_load(lxr, lx_idx, &sz); + if (!vpd) { + prerror("VPD: Failed to load VPD LID\n"); + } else { + dt_add_property(hub_node, "ibm,io-vpd", vpd, sz); + free(vpd); + } +} |