/* Copyright 2013-2014 IBM Corp.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * 	http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * TODO: Index array by PIR to be able to catch them easily
 * from assembly such as machine checks etc...
 */
#include <skiboot.h>
#include <cpu.h>
#include <device.h>
#include <opal.h>
#include <stack.h>
#include <trace.h>
#include <affinity.h>
#include <chip.h>
#include <timebase.h>
#include <ccan/str/str.h>
#include <ccan/container_of/container_of.h>

/* The cpu_threads array is static and indexed by PIR in
 * order to speed up lookup from asm entry points
 */
struct cpu_stack {
	union {
		uint8_t	stack[STACK_SIZE];
		struct cpu_thread cpu;
	};
} __align(STACK_SIZE);

static struct cpu_stack *cpu_stacks = (struct cpu_stack *)CPU_STACKS_BASE;
unsigned int cpu_thread_count;
unsigned int cpu_max_pir;
struct cpu_thread *boot_cpu;
static struct lock reinit_lock = LOCK_UNLOCKED;

unsigned long cpu_secondary_start __force_data = 0;

struct cpu_job {
	struct list_node	link;
	void			(*func)(void *data);
	void			*data;
	bool			complete;
	bool		        no_return;
};

/* attribute const as cpu_stacks is constant. */
unsigned long __attrconst cpu_stack_bottom(unsigned int pir)
{
	return ((unsigned long)&cpu_stacks[pir]) +
		sizeof(struct cpu_thread) + STACK_SAFETY_GAP;
}

unsigned long __attrconst cpu_stack_top(unsigned int pir)
{
	/* This is the top of the MC stack which is above the normal
	 * stack, which means a SP between cpu_stack_bottom() and
	 * cpu_stack_top() can either be a normal stack pointer or
	 * a Machine Check stack pointer
	 */
	return ((unsigned long)&cpu_stacks[pir]) +
		NORMAL_STACK_SIZE - STACK_TOP_GAP;
}

void __nomcount cpu_relax(void)
{
	/* Relax a bit to give sibling threads some breathing space */
	smt_low();
	smt_very_low();
	asm volatile("nop; nop; nop; nop\n");
	asm volatile("nop; nop; nop; nop\n");
	asm volatile("nop; nop; nop; nop\n");
	asm volatile("nop; nop; nop; nop\n");
	smt_medium();
}

struct cpu_job *__cpu_queue_job(struct cpu_thread *cpu,
				void (*func)(void *data), void *data,
				bool no_return)
{
	struct cpu_job *job;

	if (!cpu_is_available(cpu)) {
		prerror("CPU: Tried to queue job on unavailable CPU 0x%04x\n",
			cpu->pir);
		return NULL;
	}

	job = zalloc(sizeof(struct cpu_job));
	if (!job)
		return NULL;
	job->func = func;
	job->data = data;
	job->complete = false;
	job->no_return = no_return;

	if (cpu != this_cpu()) {
		lock(&cpu->job_lock);
		list_add_tail(&cpu->job_queue, &job->link);
		unlock(&cpu->job_lock);
	} else {
		func(data);
		job->complete = true;
	}

	/* XXX Add poking of CPU with interrupt */

	return job;
}

bool cpu_poll_job(struct cpu_job *job)
{
	lwsync();
	return job->complete;
}

void cpu_wait_job(struct cpu_job *job, bool free_it)
{
	unsigned long ticks = usecs_to_tb(5);

	if (!job)
		return;

	while(!job->complete) {
		time_wait(ticks);
		lwsync();
	}
	lwsync();
	smt_medium();

	if (free_it)
		free(job);
}

void cpu_free_job(struct cpu_job *job)
{
	if (!job)
		return;

	assert(job->complete);
	free(job);
}

void cpu_process_jobs(void)
{
	struct cpu_thread *cpu = this_cpu();
	struct cpu_job *job;
	void (*func)(void *);
	void *data;

	sync();
	if (list_empty(&cpu->job_queue))
		return;

	lock(&cpu->job_lock);
	while (true) {
		bool no_return;

		if (list_empty(&cpu->job_queue))
			break;
		smt_medium();
		job = list_pop(&cpu->job_queue, struct cpu_job, link);
		if (!job)
			break;
		func = job->func;
		data = job->data;
		no_return = job->no_return;
		unlock(&cpu->job_lock);
		if (no_return)
			free(job);
		func(data);
		lock(&cpu->job_lock);
		if (!no_return) {
			lwsync();
			job->complete = true;
		}
	}
	unlock(&cpu->job_lock);
}

struct dt_node *get_cpu_node(u32 pir)
{
	struct cpu_thread *t = find_cpu_by_pir(pir);

	return t ? t->node : NULL;
}

/* This only covers primary, active cpus */
struct cpu_thread *find_cpu_by_chip_id(u32 chip_id)
{
	struct cpu_thread *t;

	for_each_available_cpu(t) {
		if (t->is_secondary)
			continue;
		if (t->chip_id == chip_id)
			return t;
	}
	return NULL;
}

struct cpu_thread *find_cpu_by_node(struct dt_node *cpu)
{
	struct cpu_thread *t;

	for_each_available_cpu(t) {
		if (t->node == cpu)
			return t;
	}
	return NULL;
}

struct cpu_thread *find_cpu_by_pir(u32 pir)
{
	if (pir > cpu_max_pir)
		return NULL;
	return &cpu_stacks[pir].cpu;
}

struct cpu_thread *find_cpu_by_server(u32 server_no)
{
	struct cpu_thread *t;

	for_each_cpu(t) {
		if (t->server_no == server_no)
			return t;
	}
	return NULL;
}

struct cpu_thread *next_cpu(struct cpu_thread *cpu)
{
	struct cpu_stack *s = container_of(cpu, struct cpu_stack, cpu);
	unsigned int index;

	if (cpu == NULL)
		index = 0;
	else
		index = s - cpu_stacks + 1;
	for (; index <= cpu_max_pir; index++) {
		cpu = &cpu_stacks[index].cpu;
		if (cpu->state != cpu_state_no_cpu)
			return cpu;
	}
	return NULL;
}

struct cpu_thread *first_cpu(void)
{
	return next_cpu(NULL);
}

struct cpu_thread *next_available_cpu(struct cpu_thread *cpu)
{
	do {
		cpu = next_cpu(cpu);
	} while(cpu && !cpu_is_available(cpu));

	return cpu;
}

struct cpu_thread *first_available_cpu(void)
{
	return next_available_cpu(NULL);
}

struct cpu_thread *next_available_core_in_chip(struct cpu_thread *core,
					       u32 chip_id)
{
	do {
		core = next_cpu(core);
	} while(core && (!cpu_is_available(core) ||
			 core->chip_id != chip_id ||
			 core->is_secondary));
	return core;
}

struct cpu_thread *first_available_core_in_chip(u32 chip_id)
{
	return next_available_core_in_chip(NULL, chip_id);
}

uint32_t cpu_get_core_index(struct cpu_thread *cpu)
{
	return pir_to_core_id(cpu->pir);
}

void cpu_remove_node(const struct cpu_thread *t)
{
	struct dt_node *i;

	/* Find this cpu node */
	dt_for_each_node(dt_root, i) {
		const struct dt_property *p;

		if (!dt_has_node_property(i, "device_type", "cpu"))
			continue;
		p = dt_find_property(i, "ibm,pir");
		if (dt_property_get_cell(p, 0) == t->pir) {
			dt_free(i);
			return;
		}
	}
	prerror("CPU: Could not find cpu node %i to remove!\n", t->pir);
	abort();
}

void cpu_disable_all_threads(struct cpu_thread *cpu)
{
	unsigned int i;

	for (i = 0; i <= cpu_max_pir; i++) {
		struct cpu_thread *t = &cpu_stacks[i].cpu;

		if (t->primary == cpu->primary)
			t->state = cpu_state_disabled;
	}

	/* XXX Do something to actually stop the core */
}

static void init_cpu_thread(struct cpu_thread *t,
			    enum cpu_thread_state state,
			    unsigned int pir)
{
	init_lock(&t->job_lock);
	list_head_init(&t->job_queue);
	t->state = state;
	t->pir = pir;
#ifdef STACK_CHECK_ENABLED
	t->stack_bot_mark = LONG_MAX;
#endif
	assert(pir == container_of(t, struct cpu_stack, cpu) - cpu_stacks);
}

void pre_init_boot_cpu(void)
{
	struct cpu_thread *cpu = this_cpu();

	memset(cpu, 0, sizeof(struct cpu_thread));
}

void init_boot_cpu(void)
{
	unsigned int i, pir, pvr;

	pir = mfspr(SPR_PIR);
	pvr = mfspr(SPR_PVR);

	/* Get a CPU thread count and an initial max PIR based on PVR */
	switch(PVR_TYPE(pvr)) {
	case PVR_TYPE_P7:
	case PVR_TYPE_P7P:
		cpu_thread_count = 4;
		cpu_max_pir = SPR_PIR_P7_MASK;
		proc_gen = proc_gen_p7;
		prlog(PR_INFO, "CPU: P7 generation processor"
		      "(max %d threads/core)\n", cpu_thread_count);
		break;
	case PVR_TYPE_P8E:
	case PVR_TYPE_P8:
		cpu_thread_count = 8;
		cpu_max_pir = SPR_PIR_P8_MASK;
		proc_gen = proc_gen_p8;
		prlog(PR_INFO, "CPU: P8 generation processor"
		      "(max %d threads/core)\n", cpu_thread_count);
		break;
	default:
		prerror("CPU: Unknown PVR, assuming 1 thread\n");
		cpu_thread_count = 1;
		cpu_max_pir = mfspr(SPR_PIR);
		proc_gen = proc_gen_unknown;
	}

	prlog(PR_DEBUG, "CPU: Boot CPU PIR is 0x%04x PVR is 0x%08x\n",
	      pir, pvr);
	prlog(PR_DEBUG, "CPU: Initial max PIR set to 0x%x\n", cpu_max_pir);

	/* Clear the CPU structs */
	for (i = 0; i <= cpu_max_pir; i++)
		memset(&cpu_stacks[i].cpu, 0, sizeof(struct cpu_thread));

	/* Setup boot CPU state */
	boot_cpu = &cpu_stacks[pir].cpu;
	init_cpu_thread(boot_cpu, cpu_state_active, pir);
	init_boot_tracebuf(boot_cpu);
	assert(this_cpu() == boot_cpu);
}

void init_all_cpus(void)
{
	struct dt_node *cpus, *cpu;
	unsigned int thread, new_max_pir = 0;

	cpus = dt_find_by_path(dt_root, "/cpus");
	assert(cpus);

	/* Iterate all CPUs in the device-tree */
	dt_for_each_child(cpus, cpu) {
		unsigned int pir, server_no, chip_id;
		enum cpu_thread_state state;
		const struct dt_property *p;
		struct cpu_thread *t, *pt;

		/* Skip cache nodes */
		if (strcmp(dt_prop_get(cpu, "device_type"), "cpu"))
			continue;

		server_no = dt_prop_get_u32(cpu, "reg");

		/* If PIR property is absent, assume it's the same as the
		 * server number
		 */
		pir = dt_prop_get_u32_def(cpu, "ibm,pir", server_no);

		/* We should always have an ibm,chip-id property */
		chip_id = dt_get_chip_id(cpu);

		/* Only use operational CPUs */
		if (!strcmp(dt_prop_get(cpu, "status"), "okay"))
			state = cpu_state_present;
		else
			state = cpu_state_unavailable;

		prlog(PR_INFO, "CPU: CPU from DT PIR=0x%04x Server#=0x%x"
		      " State=%d\n", pir, server_no, state);

		/* Setup thread 0 */
		t = pt = &cpu_stacks[pir].cpu;
		if (t != boot_cpu) {
			init_cpu_thread(t, state, pir);
			/* Each cpu gets its own later in init_trace_buffers */
			t->trace = boot_cpu->trace;
		}
		t->server_no = server_no;
		t->primary = t;
		t->node = cpu;
		t->chip_id = chip_id;
		t->icp_regs = NULL; /* Will be set later */
		t->core_hmi_state = 0;
		t->core_hmi_state_ptr = &t->core_hmi_state;
		t->thread_mask = 1;

		/* Add associativity properties */
		add_core_associativity(t);

		/* Adjust max PIR */
		if (new_max_pir < (pir + cpu_thread_count - 1))
			new_max_pir = pir + cpu_thread_count - 1;

		/* Iterate threads */
		p = dt_find_property(cpu, "ibm,ppc-interrupt-server#s");
		if (!p)
			continue;
		for (thread = 1; thread < (p->len / 4); thread++) {
			prlog(PR_TRACE, "CPU:   secondary thread %d found\n",
			      thread);
			t = &cpu_stacks[pir + thread].cpu;
			init_cpu_thread(t, state, pir + thread);
			t->trace = boot_cpu->trace;
			t->server_no = ((const u32 *)p->prop)[thread];
			t->is_secondary = true;
			t->primary = pt;
			t->node = cpu;
			t->chip_id = chip_id;
			t->core_hmi_state_ptr = &pt->core_hmi_state;
			t->thread_mask = 1 << thread;
		}
		prlog(PR_INFO, "CPU:  %d secondary threads\n", thread);
	}
	cpu_max_pir = new_max_pir;
	prlog(PR_DEBUG, "CPU: New max PIR set to 0x%x\n", new_max_pir);
}

void cpu_bringup(void)
{
	struct cpu_thread *t;

	prlog(PR_INFO, "CPU: Setting up secondary CPU state\n");

	op_display(OP_LOG, OP_MOD_CPU, 0x0000);

	/* Tell everybody to chime in ! */	
	prlog(PR_INFO, "CPU: Calling in all processors...\n");
	cpu_secondary_start = 1;
	sync();

	op_display(OP_LOG, OP_MOD_CPU, 0x0002);

	for_each_cpu(t) {
		if (t->state != cpu_state_present &&
		    t->state != cpu_state_active)
			continue;

		/* Add a callin timeout ?  If so, call cpu_remove_node(t). */
		while (t->state != cpu_state_active) {
			smt_very_low();
			sync();
		}
		smt_medium();
	}

	prlog(PR_INFO, "CPU: All processors called in...\n");

	op_display(OP_LOG, OP_MOD_CPU, 0x0003);
}

void cpu_callin(struct cpu_thread *cpu)
{
	cpu->state = cpu_state_active;
}

static void opal_start_thread_job(void *data)
{
	cpu_give_self_os();

	/* We do not return, so let's mark the job as
	 * complete
	 */
	start_kernel_secondary((uint64_t)data);
}

static int64_t opal_start_cpu_thread(uint64_t server_no, uint64_t start_address)
{
	struct cpu_thread *cpu;
	struct cpu_job *job;

	cpu = find_cpu_by_server(server_no);
	if (!cpu) {
		prerror("OPAL: Start invalid CPU 0x%04llx !\n", server_no);
		return OPAL_PARAMETER;
	}
	prlog(PR_DEBUG, "OPAL: Start CPU 0x%04llx (PIR 0x%04x) -> 0x%016llx\n",
	       server_no, cpu->pir, start_address);

	lock(&reinit_lock);
	if (!cpu_is_available(cpu)) {
		unlock(&reinit_lock);
		prerror("OPAL: CPU not active in OPAL !\n");
		return OPAL_WRONG_STATE;
	}
	job = __cpu_queue_job(cpu, opal_start_thread_job, (void *)start_address,
			      true);
	unlock(&reinit_lock);
	if (!job) {
		prerror("OPAL: Failed to create CPU start job !\n");
		return OPAL_INTERNAL_ERROR;
	}
	return OPAL_SUCCESS;
}
opal_call(OPAL_START_CPU, opal_start_cpu_thread, 2);

static int64_t opal_query_cpu_status(uint64_t server_no, uint8_t *thread_status)
{
	struct cpu_thread *cpu;

	cpu = find_cpu_by_server(server_no);
	if (!cpu) {
		prerror("OPAL: Query invalid CPU 0x%04llx !\n", server_no);
		return OPAL_PARAMETER;
	}
	if (!cpu_is_available(cpu) && cpu->state != cpu_state_os) {
		prerror("OPAL: CPU not active in OPAL nor OS !\n");
		return OPAL_PARAMETER;
	}
	switch(cpu->state) {
	case cpu_state_os:
		*thread_status = OPAL_THREAD_STARTED;
		break;
	case cpu_state_active:
		/* Active in skiboot -> inactive in OS */
		*thread_status = OPAL_THREAD_INACTIVE;
		break;
	default:
		*thread_status = OPAL_THREAD_UNAVAILABLE;
	}

	return OPAL_SUCCESS;
}
opal_call(OPAL_QUERY_CPU_STATUS, opal_query_cpu_status, 2);

static int64_t opal_return_cpu(void)
{
	prlog(PR_DEBUG, "OPAL: Returning CPU 0x%04x\n", this_cpu()->pir);

	__secondary_cpu_entry();

	return OPAL_HARDWARE; /* Should not happen */
}
opal_call(OPAL_RETURN_CPU, opal_return_cpu, 0);

static void cpu_change_hile(void *hilep)
{
	bool hile = *(bool *)hilep;
	unsigned long hid0;

	hid0 = mfspr(SPR_HID0);
	if (hile)
		hid0 |= SPR_HID0_HILE;
	else
		hid0 &= ~SPR_HID0_HILE;
	prlog(PR_DEBUG, "CPU: [%08x] HID0 set to 0x%016lx\n",
	      this_cpu()->pir, hid0);
	set_hid0(hid0);

	this_cpu()->current_hile = hile;
}

static int64_t cpu_change_all_hile(bool hile)
{
	struct cpu_thread *cpu;

	prlog(PR_INFO, "CPU: Switching HILE on all CPUs to %d\n", hile);

	for_each_available_cpu(cpu) {
		if (cpu->current_hile == hile)
			continue;
		if (cpu == this_cpu()) {
			cpu_change_hile(&hile);
			continue;
		}
		cpu_wait_job(cpu_queue_job(cpu, cpu_change_hile, &hile), true);
	}
	return OPAL_SUCCESS;
}

static int64_t opal_reinit_cpus(uint64_t flags)
{
	struct cpu_thread *cpu;
	int64_t rc = OPAL_SUCCESS;
	int i;

	prerror("OPAL: Trying a CPU re-init with flags: 0x%llx\n", flags);

 again:
	lock(&reinit_lock);

	for (cpu = first_cpu(); cpu; cpu = next_cpu(cpu)) {
		if (cpu == this_cpu())
			continue;
		if (cpu->state == cpu_state_os) {
			/*
			 * That might be a race with return CPU during kexec
			 * where we are still, wait a bit and try again
			 */
			for (i = 0; (i < 1000) &&
				     (cpu->state == cpu_state_os); i++) {
				unlock(&reinit_lock);
				time_wait_ms(1);
				goto again;
			}
			if (cpu->state == cpu_state_os) {
				prerror("OPAL: CPU 0x%x not in OPAL !\n", cpu->pir);
				rc = OPAL_WRONG_STATE;
				goto bail;
			}
		}
	}
	/*
	 * Now we need to mark ourselves "active" or we'll be skipped
	 * by the various "for_each_active_..." calls done by slw_reinit()
	 */
	this_cpu()->state = cpu_state_active;

	/*
	 * If the flags affect endianness and we are on P8 DD2 or later, then
	 * use the HID bit. We use the PVR (we could use the EC level in
	 * the chip but the PVR is more readily available).
	 */
	if (proc_gen == proc_gen_p8 && PVR_VERS_MAJ(mfspr(SPR_PVR)) >= 2 &&
	    (flags & (OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE))) {
		bool hile = !!(flags & OPAL_REINIT_CPUS_HILE_LE);

		flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
		rc = cpu_change_all_hile(hile);
	}

	/* If we have a P7, error out for LE switch, do nothing for BE */
	if (proc_gen < proc_gen_p8) {
		if (flags & OPAL_REINIT_CPUS_HILE_LE)
			rc = OPAL_UNSUPPORTED;
		flags &= ~(OPAL_REINIT_CPUS_HILE_BE | OPAL_REINIT_CPUS_HILE_LE);
	}

	/* Any flags left ? */
	if (flags != 0)
		rc = slw_reinit(flags);

	/* And undo the above */
	this_cpu()->state = cpu_state_os;

bail:
	unlock(&reinit_lock);
	return rc;
}
opal_call(OPAL_REINIT_CPUS, opal_reinit_cpus, 1);