aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--sim/mips/sky-pke.c1262
-rw-r--r--sim/mips/sky-pke.h290
2 files changed, 1457 insertions, 95 deletions
diff --git a/sim/mips/sky-pke.c b/sim/mips/sky-pke.c
index b90d196..94b6502 100644
--- a/sim/mips/sky-pke.c
+++ b/sim/mips/sky-pke.c
@@ -1,7 +1,12 @@
/* Copyright (C) 1998, Cygnus Solutions */
-#include "sky-pke.h"
#include <stdlib.h>
+#include "sky-pke.h"
+#include "sky-dma.h"
+#include "sim-assert.h"
+#include "sky-vu0.h"
+#include "sky-vu1.h"
+#include "sky-gpuif.h"
/* Imported functions */
@@ -16,6 +21,13 @@ static int pke_io_read_buffer(device*, void*, int, address_word,
static int pke_io_write_buffer(device*, const void*, int, address_word,
unsigned, sim_cpu*, sim_cia);
static void pke_issue(struct pke_device*);
+static void pke_pc_advance(struct pke_device*, int num_words);
+static unsigned_4* pke_pc_operand(struct pke_device*, int word_num);
+static struct fifo_quadword* pke_pc_fifo(struct pke_device*, int word_num);
+static int pke_track_write(struct pke_device*, const void* src, int len,
+ address_word dest, unsigned_4 sourceaddr);
+static void pke_attach(SIM_DESC sd, struct pke_device* me);
+
/* Static data */
@@ -24,10 +36,9 @@ struct pke_device pke0_device =
{
{ "pke0", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
0, 0, /* ID, flags */
- PKE0_REGISTER_WINDOW_START, PKE0_FIFO_START, /* memory-mapping addresses */
{}, /* regs */
NULL, 0, 0, NULL, /* FIFO */
- 0 /* pc */
+ 0, 0 /* pc */
};
@@ -35,10 +46,9 @@ struct pke_device pke1_device =
{
{ "pke1", &pke_io_read_buffer, &pke_io_write_buffer }, /* device */
1, 0, /* ID, flags */
- PKE1_REGISTER_WINDOW_START, PKE1_FIFO_START, /* memory-mapping addresses */
{}, /* regs */
NULL, 0, 0, NULL, /* FIFO */
- 0 /* pc */
+ 0, 0 /* pc */
};
@@ -46,86 +56,85 @@ struct pke_device pke1_device =
/* External functions */
-/* Attach PKE0 addresses to main memory */
+/* Attach PKE addresses to main memory */
void
pke0_attach(SIM_DESC sd)
{
- sim_core_attach (sd,
- NULL,
- 0 /*level*/,
- access_read_write,
- 0 /*space ???*/,
- pke0_device.register_memory_addr,
- PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
- 0 /*modulo*/,
- (device*) &pke0_device,
- NULL /*buffer*/);
+ pke_attach(sd, & pke0_device);
+}
- sim_core_attach (sd,
- NULL,
- 0 /*level*/,
- access_read_write,
- 0 /*space ???*/,
- pke0_device.fifo_memory_addr,
- sizeof(quadword) /*nr_bytes*/,
- 0 /*modulo*/,
- (device*) &pke1_device,
- NULL /*buffer*/);
+void
+pke1_attach(SIM_DESC sd)
+{
+ pke_attach(sd, & pke1_device);
}
-/* Attach PKE1 addresses to main memory */
+
+/* Issue a PKE instruction if possible */
void
-pke1_attach(SIM_DESC sd)
+pke0_issue()
{
+ pke_issue(& pke0_device);
+}
+
+void
+pke1_issue()
+{
+ pke_issue(& pke0_device);
+}
+
+
+
+/* Internal functions */
+
+
+/* Attach PKE memory regions to simulator */
+
+void
+pke_attach(SIM_DESC sd, struct pke_device* me)
+{
+ /* register file */
sim_core_attach (sd,
NULL,
0 /*level*/,
access_read_write,
0 /*space ???*/,
- pke1_device.register_memory_addr,
+ (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START,
PKE_REGISTER_WINDOW_SIZE /*nr_bytes*/,
0 /*modulo*/,
- (device*) &pke1_device,
+ (device*) &pke0_device,
NULL /*buffer*/);
+ /* FIFO port */
sim_core_attach (sd,
NULL,
0 /*level*/,
access_read_write,
0 /*space ???*/,
- pke1_device.fifo_memory_addr,
+ (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR,
sizeof(quadword) /*nr_bytes*/,
0 /*modulo*/,
(device*) &pke1_device,
NULL /*buffer*/);
-}
-
-
-/* Issue a PKE0 instruction if possible */
-
-void
-pke0_issue()
-{
- pke_issue(& pke0_device);
-}
-
-
-/* Issue a PKE1 instruction if possible */
-void
-pke1_issue()
-{
- pke_issue(& pke0_device);
+ /* source-addr tracking word */
+ sim_core_attach (sd,
+ NULL,
+ 0 /*level*/,
+ access_read_write,
+ 0 /*space ???*/,
+ (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
+ sizeof(unsigned_4) /*nr_bytes*/,
+ 0 /*modulo*/,
+ NULL,
+ zalloc(sizeof(unsigned_4)) /*buffer*/);
}
-/* Internal functions */
-
-
/* Handle a PKE read; return no. of bytes read */
int
@@ -134,12 +143,18 @@ pke_io_read_buffer(device *me_,
int space,
address_word addr,
unsigned nr_bytes,
- sim_cpu *processor,
+ sim_cpu *cpu,
sim_cia cia)
{
/* downcast to gather embedding pke_device struct */
struct pke_device* me = (struct pke_device*) me_;
+ /* find my address ranges */
+ address_word my_reg_start =
+ (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
+ address_word my_fifo_addr =
+ (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
+
/* enforce that an access does not span more than one quadword */
address_word low = ADDR_TRUNC_QW(addr);
address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
@@ -147,33 +162,66 @@ pke_io_read_buffer(device *me_,
return 0;
/* classify address & handle */
- if(addr >= me->register_memory_addr &&
- addr < me->register_memory_addr + PKE_REGISTER_WINDOW_SIZE)
+ if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
{
/* register bank */
- int reg_num = ADDR_TRUNC_QW(addr - me->register_memory_addr) >> 4;
+ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
+ int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
int readable = 1;
+ quadword result;
+
+ /* clear result */
+ result[0] = result[1] = result[2] = result[3] = 0;
- /* ensure readibility of register: all okay except PKE1-only ones read on PKE0 */
+ /* handle reads to individual registers; clear `readable' on error */
switch(reg_num)
{
+ /* handle common case of register reading, side-effect free */
+ /* PKE1-only registers*/
case PKE_REG_BASE:
case PKE_REG_OFST:
case PKE_REG_TOPS:
case PKE_REG_TOP:
case PKE_REG_DBF:
- if(me->pke_number == 0) /* PKE0 cannot access these registers */
+ if(me->pke_number == 0)
readable = 0;
+ /* fall through */
+ /* PKE0 & PKE1 common registers*/
+ case PKE_REG_STAT:
+ case PKE_REG_ERR:
+ case PKE_REG_MARK:
+ case PKE_REG_CYCLE:
+ case PKE_REG_MODE:
+ case PKE_REG_NUM:
+ case PKE_REG_MASK:
+ case PKE_REG_CODE:
+ case PKE_REG_ITOPS:
+ case PKE_REG_ITOP:
+ case PKE_REG_R0:
+ case PKE_REG_R1:
+ case PKE_REG_R2:
+ case PKE_REG_R3:
+ case PKE_REG_C0:
+ case PKE_REG_C1:
+ case PKE_REG_C2:
+ case PKE_REG_C3:
+ result[0] = me->regs[reg_num][0];
+ break;
+
+ /* handle common case of write-only registers */
+ case PKE_REG_FBRST:
+ readable = 0;
+ break;
+
+ default:
+ ASSERT(0); /* test above should prevent this possibility */
}
- /* perform read & return */
+ /* perform transfer & return */
if(readable)
{
- /* find byte-offset inside register bank */
- int reg_byte = ADDR_OFFSET_QW(addr);
- void* src = ((unsigned_1*) (& me->regs[reg_num])) + reg_byte;
/* copy the bits */
- memcpy(dest, src, nr_bytes);
+ memcpy(dest, ((unsigned_1*) &result) + reg_byte, nr_bytes);
/* okay */
return nr_bytes;
}
@@ -185,16 +233,18 @@ pke_io_read_buffer(device *me_,
/* NOTREACHED */
}
- else if(addr >= me->fifo_memory_addr &&
- addr < me->fifo_memory_addr + sizeof(quadword))
+ else if(addr >= my_fifo_addr &&
+ addr < my_fifo_addr + sizeof(quadword))
{
/* FIFO */
- /* XXX: FIFO is not readable. */
- return 0;
+ /* FIFO is not readable: return a word of zeroes */
+ memset(dest, 0, nr_bytes);
+ return nr_bytes;
}
/* NOTREACHED */
+ return 0;
}
@@ -206,12 +256,18 @@ pke_io_write_buffer(device *me_,
int space,
address_word addr,
unsigned nr_bytes,
- sim_cpu *processor,
+ sim_cpu *cpu,
sim_cia cia)
{
/* downcast to gather embedding pke_device struct */
struct pke_device* me = (struct pke_device*) me_;
+ /* find my address ranges */
+ address_word my_reg_start =
+ (me->pke_number == 0) ? PKE0_REGISTER_WINDOW_START : PKE1_REGISTER_WINDOW_START;
+ address_word my_fifo_addr =
+ (me->pke_number == 0) ? PKE0_FIFO_ADDR : PKE1_FIFO_ADDR;
+
/* enforce that an access does not span more than one quadword */
address_word low = ADDR_TRUNC_QW(addr);
address_word high = ADDR_TRUNC_QW(addr + nr_bytes - 1);
@@ -219,33 +275,108 @@ pke_io_write_buffer(device *me_,
return 0;
/* classify address & handle */
- if(addr >= me->register_memory_addr &&
- addr < me->register_memory_addr + PKE_REGISTER_WINDOW_SIZE)
+ if((addr >= my_reg_start) && (addr < my_reg_start + PKE_REGISTER_WINDOW_SIZE))
{
/* register bank */
- int reg_num = ADDR_TRUNC_QW(addr - me->register_memory_addr) >> 4;
+ int reg_num = ADDR_TRUNC_QW(addr - my_reg_start) >> 4;
+ int reg_byte = ADDR_OFFSET_QW(addr); /* find byte-offset inside register bank */
int writeable = 1;
+ quadword input;
+
+ /* clear input */
+ input[0] = input[1] = input[2] = input[3] = 0;
- /* ensure readibility of register: all okay except PKE1-only ones read on PKE0 */
+ /* write user-given bytes into input */
+ memcpy(((unsigned_1*) &input) + reg_byte, src, nr_bytes);
+
+ /* handle writes to individual registers; clear `writeable' on error */
switch(reg_num)
{
+ case PKE_REG_FBRST:
+ /* XXX: order of evaluation? STP && STC ?? */
+ if(BIT_MASK_GET(input[0], 0, 0)) /* RST bit */
+ {
+ /* clear FIFO: also prevents re-execution attempt of
+ possible stalled instruction */
+ me->fifo_num_elements = me->fifo_pc;
+ /* clear registers */
+ memset(me->regs, 0, sizeof(me->regs));
+ me->flags = 0;
+ me->qw_pc = 0;
+ }
+ if(BIT_MASK_GET(input[0], 1, 1)) /* FBK bit */
+ {
+ PKE_REG_MASK_SET(me, STAT, PFS, 1);
+ }
+ if(BIT_MASK_GET(input[0], 2, 2)) /* STP bit */
+ {
+ /* XXX: how to safely abort "currently executing" (=> stalled) instruction? */
+ PKE_REG_MASK_SET(me, STAT, PSS, 1);
+ }
+ if(BIT_MASK_GET(input[0], 2, 2)) /* STC bit */
+ {
+ /* clear a bunch of status bits */
+ PKE_REG_MASK_SET(me, STAT, PSS, 0);
+ PKE_REG_MASK_SET(me, STAT, PFS, 0);
+ PKE_REG_MASK_SET(me, STAT, PIS, 0);
+ PKE_REG_MASK_SET(me, STAT, INT, 0);
+ PKE_REG_MASK_SET(me, STAT, ER0, 0);
+ PKE_REG_MASK_SET(me, STAT, ER1, 0);
+ /* will allow resumption of possible stalled instruction */
+ }
+ break;
+
+ case PKE_REG_ERR:
+ /* copy bottom three bits */
+ BIT_MASK_SET(me->regs[PKE_REG_ERR][0], 0, 2, BIT_MASK_GET(input[0], 0, 2));
+ break;
+
+ case PKE_REG_MARK:
+ /* copy bottom sixteen bits */
+ PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(input[0], 0, 15));
+ /* reset MRK bit in STAT */
+ PKE_REG_MASK_SET(me, STAT, MRK, 0);
+ break;
+
+ /* handle common case of read-only registers */
+ /* PKE1-only registers - not really necessary to handle separately */
case PKE_REG_BASE:
case PKE_REG_OFST:
case PKE_REG_TOPS:
case PKE_REG_TOP:
case PKE_REG_DBF:
- if(me->pke_number == 0) /* PKE0 cannot access these registers */
+ if(me->pke_number == 0)
writeable = 0;
+ /* fall through */
+ /* PKE0 & PKE1 common registers*/
+ case PKE_REG_STAT:
+ /* ignore FDR bit for PKE1_STAT -- simulator does not implement PKE->RAM transfers */
+ case PKE_REG_CYCLE:
+ case PKE_REG_MODE:
+ case PKE_REG_NUM:
+ case PKE_REG_MASK:
+ case PKE_REG_CODE:
+ case PKE_REG_ITOPS:
+ case PKE_REG_ITOP:
+ case PKE_REG_R0:
+ case PKE_REG_R1:
+ case PKE_REG_R2:
+ case PKE_REG_R3:
+ case PKE_REG_C0:
+ case PKE_REG_C1:
+ case PKE_REG_C2:
+ case PKE_REG_C3:
+ writeable = 0;
+ break;
+
+ default:
+ ASSERT(0); /* test above should prevent this possibility */
}
- /* perform write & return */
- if(writeable)
+ /* perform return */
+ if(writeable)
{
- /* find byte-offset inside register bank */
- int reg_byte = ADDR_OFFSET_QW(addr);
- void* dest = ((unsigned_1*) (& me->regs[reg_num])) + reg_byte;
- /* copy the bits */
- memcpy(dest, src, nr_bytes);
+ /* okay */
return nr_bytes;
}
else
@@ -256,10 +387,11 @@ pke_io_write_buffer(device *me_,
/* NOTREACHED */
}
- else if(addr >= me->fifo_memory_addr &&
- addr < me->fifo_memory_addr + sizeof(quadword))
+ else if(addr >= my_fifo_addr &&
+ addr < my_fifo_addr + sizeof(quadword))
{
/* FIFO */
+ struct fifo_quadword* fqw;
/* assert transfer size == 128 bits */
if(nr_bytes != sizeof(quadword))
@@ -283,24 +415,988 @@ pke_io_write_buffer(device *me_,
}
/* add new quadword at end of FIFO */
- memcpy(& me->fifo[++me->fifo_num_elements], src, nr_bytes);
+ fqw = & me->fifo[me->fifo_num_elements];
+ memcpy((void*) fqw->data, src, nr_bytes);
+ sim_read(CPU_STATE(cpu),
+ (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_SRCADDR : DMA_CHANNEL1_SRCADDR),
+ (void*) & fqw->source_address,
+ sizeof(address_word));
+ sim_read(CPU_STATE(cpu),
+ (SIM_ADDR) (me->pke_number == 0 ? DMA_CHANNEL0_PKTFLAG : DMA_CHANNEL1_PKTFLAG),
+ (void*) & fqw->dma_tag_present,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ me->fifo_num_elements++;
+
+ /* set FQC to "1" as FIFO is now not empty */
+ PKE_REG_MASK_SET(me, STAT, FQC, 1);
/* okay */
return nr_bytes;
}
/* NOTREACHED */
+ return 0;
}
-/* Issue & swallow one PKE opcode if possible */
+/* Issue & swallow next PKE opcode if possible/available */
void
pke_issue(struct pke_device* me)
{
+ struct fifo_quadword* fqw;
+ unsigned_4 fw;
+ unsigned_4 cmd, intr, num;
+ unsigned_4 imm;
+ int next_pps_state; /* PPS after this instruction issue attempt */
+
+ /* 1 -- test go / no-go for PKE execution */
+
+ /* check for stall/halt control bits */
+ /* XXX: What is the PEW bit for? */
+ if(PKE_REG_MASK_GET(me, STAT, PSS) ||
+ PKE_REG_MASK_GET(me, STAT, PFS) ||
+ /* maskable stall controls: ER0, ER1, PIS */
+ (PKE_REG_MASK_GET(me, STAT, ER0) && !PKE_REG_MASK_GET(me, ERR, ME0)) ||
+ (PKE_REG_MASK_GET(me, STAT, ER1) && !PKE_REG_MASK_GET(me, ERR, ME1)) ||
+ (PKE_REG_MASK_GET(me, STAT, PIS) && !PKE_REG_MASK_GET(me, ERR, MII)))
+ {
+ /* XXX */
+ }
+ /* XXX: handle PSS by *skipping* instruction? */
+
+ /* confirm availability of new quadword of PKE instructions */
+ if(me->fifo_num_elements <= me->fifo_pc)
+ return;
+
+
+ /* 2 -- fetch PKE instruction */
+
+ /* "fetch" instruction quadword */
+ fqw = & me->fifo[me->fifo_pc];
+
+ /* skip over DMA tags, if present */
+ if((fqw->dma_tag_present != 0) && (me->qw_pc < 2))
+ {
+ ASSERT(me->qw_pc == 0);
+ /* XXX: check validity of DMA tag; if bad, set ER0 flag */
+ me->qw_pc = 2;
+ }
+
+ /* "fetch" instruction word */
+ fw = fqw->data[me->qw_pc];
+
+ /* store it in PKECODE register */
+ me->regs[PKE_REG_CODE][0] = fw;
+
+
+ /* 3 -- decode PKE instruction */
+
+ /* PKE instruction format: [intr 0:0][pke-command 6:0][num 7:0][immediate 15:0],
+ so op-code is in top byte. */
+ intr = BIT_MASK_GET(fw, PKE_OPCODE_I_B, PKE_OPCODE_I_E);
+ cmd = BIT_MASK_GET(fw, PKE_OPCODE_CMD_B, PKE_OPCODE_CMD_E);
+ num = BIT_MASK_GET(fw, PKE_OPCODE_NUM_B, PKE_OPCODE_NUM_E);
+ imm = BIT_MASK_GET(fw, PKE_OPCODE_IMM_B, PKE_OPCODE_IMM_E);
+
+ if(intr)
+ {
+ /* set INT flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, INT, 1);
+ /* XXX: send interrupt to R5900? */
+ }
+
+ /* decoding */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_DECODE);
+ next_pps_state = PKE_REG_STAT_PPS_IDLE; /* assume instruction completes */
+
+ /* decode */
+ if(IS_PKE_CMD(cmd, PKENOP))
+ {
+ /* no work required, yey */
+ pke_pc_advance(me, 1);
+ }
+ else if(IS_PKE_CMD(cmd, STCYCL))
+ {
+ /* copy immediate value into CYCLE reg */
+ me->regs[PKE_REG_CYCLE][0] = imm;
+ pke_pc_advance(me, 1);
+ }
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, OFFSET))
+ {
+ /* copy 10 bits to OFFSET field */
+ PKE_REG_MASK_SET(me, OFST, OFFSET, BIT_MASK_GET(imm, 0, 9));
+ /* clear DBF bit */
+ PKE_REG_MASK_SET(me, DBF, DF, 0);
+ /* clear other DBF bit */
+ PKE_REG_MASK_SET(me, STAT, DBF, 0);
+ /* set TOPS = BASE */
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ PKE_REG_MASK_GET(me, BASE, BASE));
+ pke_pc_advance(me, 1);
+ }
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, BASE))
+ {
+ /* copy 10 bits to BASE field */
+ PKE_REG_MASK_SET(me, BASE, BASE, BIT_MASK_GET(imm, 0, 9));
+ /* clear DBF bit */
+ PKE_REG_MASK_SET(me, DBF, DF, 0);
+ /* clear other DBF bit */
+ PKE_REG_MASK_SET(me, STAT, DBF, 0);
+ /* set TOPS = BASE */
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ PKE_REG_MASK_GET(me, BASE, BASE));
+ pke_pc_advance(me, 1);
+ }
+ else if(IS_PKE_CMD(cmd, ITOP))
+ {
+ /* copy 10 bits to ITOPS field */
+ PKE_REG_MASK_SET(me, ITOPS, ITOPS, BIT_MASK_GET(imm, 0, 9));
+ pke_pc_advance(me, 1);
+ }
+ else if(IS_PKE_CMD(cmd, STMOD))
+ {
+ /* copy 2 bits to MODE register */
+ PKE_REG_MASK_SET(me, MODE, MDE, BIT_MASK_GET(imm, 0, 2));
+ pke_pc_advance(me, 1);
+ }
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, MSKPATH3)) /* MSKPATH3 */
+ {
+ /* XXX: what to do with this? DMA control register? */
+ pke_pc_advance(me, 1);
+ }
+ else if(IS_PKE_CMD(cmd, PKEMARK))
+ {
+ /* copy 16 bits to MARK register */
+ PKE_REG_MASK_SET(me, MARK, MARK, BIT_MASK_GET(imm, 0, 15));
+ /* set MRK bit in STAT register - CPU2 v2.1 docs incorrect */
+ PKE_REG_MASK_SET(me, STAT, MRK, 1);
+ pke_pc_advance(me, 1);
+ }
+ else if(IS_PKE_CMD(cmd, FLUSHE))
+ {
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VBS bit is clear, i.e., VU is idle */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
+ {
+ /* VU idle */
+ /* advance PC */
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* VU busy */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSH))
+ {
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VGW bit is clear, i.e., PATH1 is idle */
+ /* simulator design implies PATH2 is always "idle" */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
+ BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
+ 1 /* PATH2 always idle */)
+ {
+ /* VU idle */
+ /* PATH1 idle */
+ /* PATH2 idle */
+ /* advance PC */
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* GPUIF busy */
+ /* retry this instruction next clock */
+ }
+ }
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, FLUSHA))
+ {
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VGW bit is clear, i.e., PATH1 is idle */
+ /* simulator design implies PATH2 is always "idle" */
+ /* XXX: simulator design implies PATH3 is always "idle" */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
+ BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
+ 1 /* PATH2 always idle */ &&
+ 1 /* PATH3 always idle */)
+ {
+ /* VU idle */
+ /* PATH1 idle */
+ /* PATH2 idle */
+ /* PATH3 idle */
+ /* advance PC */
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* GPUIF busy */
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, PKEMSCAL))
+ {
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VBS bit is clear, i.e., VU is idle */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
+ {
+ /* VU idle */
+ unsigned_4 vu_pc;
+
+ /* perform PKE1-unique processing for microprogram calls */
+ if(me->pke_number == 1)
+ {
+ /* flip DBF */
+ PKE_REG_MASK_SET(me, DBF, DF,
+ PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
+ PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
+ /* compute new TOPS */
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ (PKE_REG_MASK_GET(me, BASE, BASE) +
+ (PKE_REG_MASK_GET(me, DBF, DF) *
+ PKE_REG_MASK_GET(me, OFST, OFFSET))));
+ /* compute new ITOP and TOP */
+ PKE_REG_MASK_SET(me, ITOP, ITOP,
+ PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+ PKE_REG_MASK_SET(me, TOP, TOP,
+ PKE_REG_MASK_GET(me, TOPS, TOPS));
+ }
+
+ /* compute new PC */
+ vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
+ /* write new PC; callback function gets VU running */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+ /* advance PC */
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* VU busy */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, PKEMSCNT))
+ {
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VBS bit is clear, i.e., VU is idle */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
+ {
+ /* VU idle */
+ unsigned_4 vu_pc;
+
+ /* flip DBF etc. for PKE1 */
+ if(me->pke_number == 1)
+ {
+ PKE_REG_MASK_SET(me, DBF, DF,
+ PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
+ PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ (PKE_REG_MASK_GET(me, BASE, BASE) +
+ (PKE_REG_MASK_GET(me, DBF, DF) *
+ PKE_REG_MASK_GET(me, OFST, OFFSET))));
+ PKE_REG_MASK_SET(me, ITOP, ITOP,
+ PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+ PKE_REG_MASK_SET(me, TOP, TOP,
+ PKE_REG_MASK_GET(me, TOPS, TOPS));
+ }
+
+ /* read old PC */
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+ /* rewrite its PC; callback function gets VU running */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+ /* advance PC */
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* VU busy */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(me->pke_number == 1 && IS_PKE_CMD(cmd, PKEMSCALF))
+ {
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VGW bit is clear, i.e., PATH1 is idle */
+ /* simulator design implies PATH2 is always "idle" */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0 &&
+ BIT_MASK_GET(vu_stat, VU_REG_STAT_VGW_B, VU_REG_STAT_VGW_E) == 0 &&
+ 1 /* PATH2 always idle */)
+ {
+ /* VU idle */
+ /* PATH1 idle */
+ /* PATH2 idle */
+ unsigned_4 vu_pc;
+
+ /* flip DBF etc. for PKE1 */
+ if(me->pke_number == 1)
+ {
+ PKE_REG_MASK_SET(me, DBF, DF,
+ PKE_REG_MASK_GET(me, DBF, DF) ? 0 : 1);
+ PKE_REG_MASK_SET(me, STAT, DBF, PKE_REG_MASK_GET(me, DBF, DF));
+ PKE_REG_MASK_SET(me, TOPS, TOPS,
+ (PKE_REG_MASK_GET(me, BASE, BASE) +
+ (PKE_REG_MASK_GET(me, DBF, DF) *
+ PKE_REG_MASK_GET(me, OFST, OFFSET))));
+ PKE_REG_MASK_SET(me, ITOP, ITOP,
+ PKE_REG_MASK_GET(me, ITOPS, ITOPS));
+ PKE_REG_MASK_SET(me, TOP, TOP,
+ PKE_REG_MASK_GET(me, TOPS, TOPS));
+ }
+
+ /* compute new PC */
+ vu_pc = BIT_MASK_GET(imm, 0, 15); /* XXX: all bits significant? */
+ /* write new PC; callback function gets VU running */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VU0_PC_START : VU1_PC_START),
+ (void*) & vu_pc,
+ sizeof(unsigned_4));
+ /* advance PC */
+ pke_pc_advance(me, 1);
+ }
+ else
+ {
+ /* VU busy */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, STMASK))
+ {
+ /* check that FIFO has one more word for STMASK operand */
+ unsigned_4* mask;
+
+ mask = pke_pc_operand(me, 1);
+ if(mask != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+ /* fill the register */
+ PKE_REG_MASK_SET(me, MASK, MASK, *mask);
+ /* advance PC */
+ pke_pc_advance(me, 2);
+ }
+ else
+ {
+ /* need to wait for another word */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, STROW))
+ {
+ /* check that FIFO has four more words for STROW operand */
+ unsigned_4* last_op;
+
+ last_op = pke_pc_operand(me, 4);
+ if(last_op != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* copy ROW registers: must all exist if 4th operand exists */
+ me->regs[PKE_REG_R0][0] = * pke_pc_operand(me, 1);
+ me->regs[PKE_REG_R1][0] = * pke_pc_operand(me, 2);
+ me->regs[PKE_REG_R2][0] = * pke_pc_operand(me, 3);
+ me->regs[PKE_REG_R3][0] = * pke_pc_operand(me, 4);
+
+ /* advance PC */
+ pke_pc_advance(me, 5);
+ }
+ else
+ {
+ /* need to wait for another word */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, STCOL))
+ {
+ /* check that FIFO has four more words for STCOL operand */
+ unsigned_4* last_op;
+
+ last_op = pke_pc_operand(me, 4);
+ if(last_op != NULL)
+ {
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* copy COL registers: must all exist if 4th operand exists */
+ me->regs[PKE_REG_C0][0] = * pke_pc_operand(me, 1);
+ me->regs[PKE_REG_C1][0] = * pke_pc_operand(me, 2);
+ me->regs[PKE_REG_C2][0] = * pke_pc_operand(me, 3);
+ me->regs[PKE_REG_C3][0] = * pke_pc_operand(me, 4);
+
+ /* advance PC */
+ pke_pc_advance(me, 5);
+ }
+ else
+ {
+ /* need to wait for another word */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, MPG))
+ {
+ unsigned_4* last_mpg_word;
+
+ /* map zero to max+1 */
+ if(num==0) num=0x100;
+
+ /* check that FIFO has a few more words for MPG operand */
+ last_mpg_word = pke_pc_operand(me, num*2); /* num: number of 64-bit words */
+ if(last_mpg_word != NULL)
+ {
+ /* perform implied FLUSHE */
+ /* read VU status word */
+ unsigned_4 vu_stat;
+ sim_read(NULL,
+ (SIM_ADDR) (me->pke_number == 0 ? VPE0_STAT : VPE1_STAT),
+ (void*) & vu_stat,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+
+ /* check if VBS bit is clear, i.e., VU is idle */
+ if(BIT_MASK_GET(vu_stat, VU_REG_STAT_VBS_B, VU_REG_STAT_VBS_E) == 0)
+ {
+ /* VU idle */
+ int i;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* transfer VU instructions, one word per iteration */
+ for(i=0; i<num*2; i++)
+ {
+ address_word vu_addr_base, vu_addr;
+ address_word vutrack_addr_base, vutrack_addr;
+ struct fifo_quadword* fq = pke_pc_fifo(me, num);
+ unsigned_4* operand = pke_pc_operand(me, num);
+
+ /* imm: in 64-bit units for MPG instruction */
+
+ /* XXX: set NUM */
+
+ /* VU*_MEM0 : instruction memory */
+ vu_addr_base = (me->pke_number == 0) ?
+ VU0_MEM0_WINDOW_START : VU0_MEM0_WINDOW_START;
+ vu_addr = vu_addr_base + (imm*2) + i;
+
+ /* VU*_MEM0_TRACK : source-addr tracking table */
+ vutrack_addr_base = (me->pke_number == 0) ?
+ VU0_MEM0_SRCADDR_START : VU1_MEM0_SRCADDR_START;
+ vutrack_addr = vu_addr_base + (imm*2) + i;
+
+ /* write data into VU memory */
+ pke_track_write(me, operand, sizeof(unsigned_4),
+ vu_addr, fq->source_address);
+
+ /* write srcaddr into VU srcaddr tracking table */
+ sim_write(NULL,
+ (SIM_ADDR) vutrack_addr,
+ (void*) & fq->source_address,
+ sizeof(unsigned_4));
+ /* XXX: check RC */
+ } /* VU xfer loop */
+
+ /* advance PC */
+ pke_pc_advance(me, 1 + num*2);
+ }
+ else
+ {
+ /* VU busy */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ } /* if FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, DIRECT) || IS_PKE_CMD(cmd, DIRECTHL)) /* treat identically */
+ {
+ /* check that FIFO has a few more words for DIRECT operand */
+ unsigned_4* last_direct_word;
+
+ /* map zero to max+1 */
+ if(imm==0) imm=0x10000;
+
+ last_direct_word = pke_pc_operand(me, imm*4); /* num: number of 128-bit words */
+ if(last_direct_word != NULL)
+ {
+ /* VU idle */
+ int i;
+ quadword fifo_data;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* transfer GPUIF quadwords, one word per iteration */
+ for(i=0; i<imm*4; i++)
+ {
+ struct fifo_quadword* fq = pke_pc_fifo(me, num);
+ unsigned_4* operand = pke_pc_operand(me, num);
+
+ /* collect word into quadword */
+ fifo_data[i%4] = *operand;
+
+ /* write to GPUIF FIFO only with full word */
+ if(i%4 == 3)
+ {
+ address_word gpuif_fifo = GPUIF_PATH2_FIFO_ADDR+(i/4);
+ pke_track_write(me, fifo_data, sizeof(quadword),
+ (SIM_ADDR) gpuif_fifo, fq->source_address);
+ /* XXX: check RC */
+ } /* write collected quadword */
+
+ } /* GPUIF xfer loop */
+
+ /* advance PC */
+ pke_pc_advance(me, 1 + imm*4);
+ } /* if FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ else if(IS_PKE_CMD(cmd, UNPACK)) /* warning: monster complexity */
+ {
+ short vn = BIT_MASK_GET(cmd, 2, 3);
+ short vl = BIT_MASK_GET(cmd, 0, 1);
+ short vnvl = BIT_MASK_GET(cmd, 0, 3);
+ int m = BIT_MASK_GET(cmd, 4, 4);
+ short cl = PKE_REG_MASK_GET(me, CYCLE, CL);
+ short wl = PKE_REG_MASK_GET(me, CYCLE, WL);
+ int n, num_operands;
+ unsigned_4* last_operand_word;
+
+ /* map zero to max+1 */
+ if(num==0) num=0x100;
+
+ /* compute PKEcode length, as given in CPU2 spec, v2.1 pg. 11 */
+ if(wl <= cl)
+ n = num;
+ else
+ n = cl * (num/wl) + PKE_LIMIT(num % wl, cl);
+ num_operands = (((sizeof(unsigned_4) >> vl) * (vn+1) * n)/sizeof(unsigned_4));
+
+ /* confirm that FIFO has enough words in it */
+ last_operand_word = pke_pc_operand(me, num_operands);
+ if(last_operand_word != NULL)
+ {
+ address_word vu_addr_base;
+ int operand_num, vector_num;
+
+ /* "transferring" operand */
+ PKE_REG_MASK_SET(me, STAT, PPS, PKE_REG_STAT_PPS_XFER);
+
+ /* XXX: don't check whether VU is idle?? */
+
+ if(me->pke_number == 0)
+ vu_addr_base = VU0_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
+ else
+ {
+ vu_addr_base = VU1_MEM1_WINDOW_START + BIT_MASK_GET(imm, 0, 9);
+ if(BIT_MASK_GET(imm, 15, 15)) /* fetch R flag from imm word */
+ vu_addr_base += PKE_REG_MASK_GET(me, TOPS, TOPS);
+ }
+
+ /* XXX: vu_addr overflow check */
+
+ /* transfer given number of vectors */
+ operand_num = 1; /* word index into instruction stream: 1..num_operands */
+ vector_num = 0; /* vector number being processed: 0..num-1 */
+ while(operand_num <= num_operands)
+ {
+ quadword vu_old_data;
+ quadword vu_new_data;
+ quadword unpacked_data;
+ address_word vu_addr;
+ struct fifo_quadword* fq;
+ int i;
+
+ /* XXX: set NUM */
+
+ /* compute VU destination address, as bytes in R5900 memory */
+ if(cl >= wl)
+ {
+ /* map zero to max+1 */
+ if(wl == 0) wl = 0x0100;
+ vu_addr = vu_addr_base + 16*(cl*(vector_num/wl) + (vector_num%wl));
+ }
+ else
+ vu_addr = vu_addr_base + 16*vector_num;
+
+ /* read old VU data word at address */
+ sim_read(NULL, (SIM_ADDR) vu_addr, (void*) & vu_old_data, sizeof(vu_old_data));
+
+ /* Let sourceaddr track the first operand */
+ fq = pke_pc_fifo(me, operand_num);
+
+ /* For cyclic unpack, next operand quadword may come from instruction stream
+ or be zero. */
+ if((cl < wl) && ((vector_num % wl) >= cl)) /* wl != 0, set above */
+ {
+ /* clear operand - used only in a "indeterminate" state */
+ for(i = 0; i < 4; i++)
+ unpacked_data[i] = 0;
+ }
+ else
+ {
+ /* compute unpacked words from instruction stream */
+ switch(vnvl)
+ {
+ case PKE_UNPACK_S_32:
+ case PKE_UNPACK_V2_32:
+ case PKE_UNPACK_V3_32:
+ case PKE_UNPACK_V4_32:
+ /* copy (vn+1) 32-bit values */
+ for(i = 0; i < vn+1; i++)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = *operand;
+ operand_num ++;
+ }
+ break;
+
+ case PKE_UNPACK_S_16:
+ case PKE_UNPACK_V2_16:
+ case PKE_UNPACK_V3_16:
+ case PKE_UNPACK_V4_16:
+ /* copy (vn+1) 16-bit values, packed two-per-word */
+ for(i=0; i<vn+1; i+=2)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 15, 31);
+ unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 16, 31, 31);
+ operand_num ++;
+ }
+ break;
+
+ case PKE_UNPACK_S_8:
+ case PKE_UNPACK_V2_8:
+ case PKE_UNPACK_V3_8:
+ case PKE_UNPACK_V4_8:
+ /* copy (vn+1) 8-bit values, packed four-per-word */
+ for(i=0; i<vn+1; i+=4)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 7, 31);
+ unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 8, 15, 31);
+ unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 16, 23, 31);
+ unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 24, 31, 31);
+ operand_num ++;
+ }
+ break;
+
+ case PKE_UNPACK_V4_5:
+ /* copy four 1/5/5/5-bit values, packed into a sixteen-bit */
+ for(i=0; i<vn+1; i+=4)
+ {
+ unsigned_4* operand = pke_pc_operand(me, operand_num);
+ unpacked_data[i] = BIT_MASK_GET_SX(*operand, 0, 4, 31);
+ unpacked_data[i+1] = BIT_MASK_GET_SX(*operand, 5, 9, 31);
+ unpacked_data[i+2] = BIT_MASK_GET_SX(*operand, 10, 14, 31);
+ unpacked_data[i+3] = BIT_MASK_GET_SX(*operand, 15, 15, 31);
+ /* ignore other 16 bits in operand */
+ operand_num ++;
+ }
+ break;
+
+ default: /* bad UNPACK code */
+ {
+ /* XXX: how to handle? */
+ /* set ER1 flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, ER1, 1);
+ }
+ }
+ }
+
+ /* compute replacement word - function of vn, vl, mask */
+ if(m) /* use mask register? */
+ {
+ /* compute index into mask register for this word */
+ int mask_index = PKE_LIMIT(vector_num % wl, 3); /* wl != 0, set above */
+
+ for(i=0; i<3; i++) /* loop over columns */
+ {
+ int mask_op = PKE_MASKREG_GET(me, mask_index, i);
+ unsigned_4* masked_value = NULL;
+ unsigned_4 zero = 0;
+
+ switch(mask_op)
+ {
+ case PKE_MASKREG_INPUT:
+ /* for vn == 0, all columns are copied from column 0 */
+ if(vn == 0)
+ masked_value = & unpacked_data[0];
+ else if(i > vn)
+ masked_value = & zero; /* XXX: what to put here? */
+ else
+ masked_value = & unpacked_data[i];
+ break;
+
+ case PKE_MASKREG_ROW: /* exploit R0..R3 contiguity */
+ masked_value = & me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MASKREG_COLUMN: /* exploit C0..C3 contiguity */
+ masked_value = & me->regs[PKE_REG_C0 + PKE_LIMIT(vector_num,3)][0];
+ break;
+
+ case PKE_MASKREG_NOTHING:
+ /* "write inhibit" by re-copying old data */
+ masked_value = & vu_old_data[i];
+ break;
+
+ default:
+ ASSERT(0);
+ /* no other cases possible */
+ }
+
+ /* copy masked value for column */
+ memcpy(& vu_new_data[i], masked_value, sizeof(unsigned_4));
+ } /* loop over columns */
+ }
+ else
+ {
+ /* no mask - just copy over entire unpacked quadword */
+ memcpy(vu_new_data, unpacked_data, sizeof(unpacked_data));
+ }
+
+ /* process STMOD register for accumulation operations */
+ switch(PKE_REG_MASK_GET(me, MODE, MDE))
+ {
+ case PKE_MODE_ADDROW: /* add row registers to output data */
+ for(i=0; i<4; i++)
+ /* exploit R0..R3 contiguity */
+ vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
+ break;
+
+ case PKE_MODE_ACCROW: /* add row registers to output data; accumulate */
+ for(i=0; i<4; i++)
+ {
+ /* exploit R0..R3 contiguity */
+ vu_new_data[i] += me->regs[PKE_REG_R0 + i][0];
+ me->regs[PKE_REG_R0 + i][0] = vu_new_data[i];
+ }
+ break;
+
+ case PKE_MODE_INPUT: /* pass data through */
+ default:
+ ;
+ }
+
+ /* write replacement word */
+ pke_track_write(me, vu_new_data, sizeof(vu_new_data),
+ (SIM_ADDR) vu_addr, fq->source_address);
+
+ /* next vector please */
+ vector_num ++;
+ } /* vector transfer loop */
+ } /* PKE FIFO full enough */
+ else
+ {
+ /* need to wait for another word */
+ next_pps_state = PKE_REG_STAT_PPS_WAIT;
+ /* retry this instruction next clock */
+ }
+ }
+ /* ... */
+ else
+ {
+ /* set ER1 flag in STAT register */
+ PKE_REG_MASK_SET(me, STAT, ER1, 1);
+ /* advance over faulty word */
+ pke_pc_advance(me, 1);
+ }
+
+ /* PKE is now idle or waiting */
+ PKE_REG_MASK_SET(me, STAT, PPS, next_pps_state);
+}
+
+
+
+
+
+
+/* advance the PC by given number of words; update STAT/FQC field */
+
+void
+pke_pc_advance(struct pke_device* me, int num_words)
+{
+ ASSERT(num_words > 0);
+
+ me->qw_pc += num_words;
+ /* handle overflow */
+ while(me->qw_pc >= 4)
+ {
+ me->qw_pc -= 4;
+ me->fifo_pc ++;
+ }
+
+ /* clear FQC if FIFO is now empty */
+ if(me->fifo_num_elements == me->fifo_pc)
+ {
+ PKE_REG_MASK_SET(me, STAT, FQC, 0);
+ }
+
+}
+
+
+
+/* Return pointer to given operand# in FIFO. `word_num' starts at 1.
+ If FIFO is not full enough, return 0. */
+
+unsigned_4*
+pke_pc_operand(struct pke_device* me, int word_num)
+{
+ int new_qw_pc, new_fifo_pc;
+ unsigned_4* operand;
+
+ ASSERT(word_num > 0);
+
+ new_fifo_pc = me->fifo_pc;
+ new_qw_pc += me->qw_pc + word_num;
+
+ /* handle overflow */
+ while(new_qw_pc >= 4)
+ {
+ new_qw_pc -= 4;
+ new_fifo_pc ++;
+ }
+
+ /* not enough elements */
+ if(me->fifo_num_elements == me->fifo_pc)
+ operand = NULL;
+ else
+ operand = & me->fifo[new_fifo_pc].data[new_qw_pc];
+
+ return operand;
+}
+
+
+
+/* Return pointer to FIFO quadword containing given operand# in FIFO.
+ `word_num' starts at 1. If FIFO is not full enough, return 0. */
+
+struct fifo_quadword*
+pke_pc_fifo(struct pke_device* me, int word_num)
+{
+ int new_qw_pc, new_fifo_pc;
+ struct fifo_quadword* operand;
+ ASSERT(word_num > 0);
+ new_fifo_pc = me->fifo_pc;
+ new_qw_pc += me->qw_pc + word_num;
+
+ /* handle overflow */
+ while(new_qw_pc >= 4)
+ {
+ new_qw_pc -= 4;
+ new_fifo_pc ++;
+ }
+
+ /* not enough elements */
+ if(me->fifo_num_elements == me->fifo_pc)
+ operand = NULL;
+ else
+ operand = & me->fifo[new_fifo_pc];
+
+ return operand;
}
+
+/* Write a bunch of bytes into simulator memory. Store the given source address into the
+ PKE sourceaddr tracking word. */
+int
+pke_track_write(struct pke_device* me, const void* src, int len,
+ address_word dest, unsigned_4 sourceaddr)
+{
+ int rc;
+ unsigned_4 no_sourceaddr = 0;
+
+ /* write srcaddr into PKE srcaddr tracking */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
+ (void*) & sourceaddr,
+ sizeof(unsigned_4));
+
+ /* write bytes into simulator */
+ rc = sim_write(NULL,
+ (SIM_ADDR) dest,
+ (void*) src,
+ len);
+
+ /* clear srcaddr from PKE srcaddr tracking */
+ sim_write(NULL,
+ (SIM_ADDR) (me->pke_number == 0) ? PKE0_SRCADDR : PKE1_SRCADDR,
+ (void*) & no_sourceaddr,
+ sizeof(unsigned_4));
+
+ return rc;
+}
diff --git a/sim/mips/sky-pke.h b/sim/mips/sky-pke.h
index d15b821..e4d0cca 100644
--- a/sim/mips/sky-pke.h
+++ b/sim/mips/sky-pke.h
@@ -7,6 +7,11 @@
#include "sky-device.h"
+/* Debugguing PKE? */
+
+#define PKE_DEBUG
+
+
/* External functions */
void pke0_attach(SIM_DESC sd);
@@ -17,7 +22,7 @@ void pke1_issue();
/* Quadword data type */
-typedef unsigned int quadword[4];
+typedef unsigned_4 quadword[4];
/* truncate address to quadword */
#define ADDR_TRUNC_QW(addr) ((addr) & ~0x0f)
@@ -29,8 +34,8 @@ typedef unsigned int quadword[4];
#define PKE0_REGISTER_WINDOW_START 0x10000800
#define PKE1_REGISTER_WINDOW_START 0x10000A00
-#define PKE0_FIFO_START 0x10008000
-#define PKE1_FIFO_START 0x10008010
+#define PKE0_FIFO_ADDR 0x10008000
+#define PKE1_FIFO_ADDR 0x10008010
/* Quadword indices of PKE registers. Actual registers sit at bottom
@@ -51,11 +56,11 @@ typedef unsigned int quadword[4];
#define PKE_REG_ITOP 0x0d
#define PKE_REG_TOP 0x0e /* pke1 only */
#define PKE_REG_DBF 0x0f /* pke1 only */
-#define PKE_REG_R0 0x10
+#define PKE_REG_R0 0x10 /* R0 .. R3 must be contiguous */
#define PKE_REG_R1 0x11
#define PKE_REG_R2 0x12
#define PKE_REG_R3 0x13
-#define PKE_REG_C0 0x14
+#define PKE_REG_C0 0x14 /* C0 .. C3 must be contiguous */
#define PKE_REG_C1 0x15
#define PKE_REG_C2 0x16
#define PKE_REG_C3 0x17
@@ -64,11 +69,273 @@ typedef unsigned int quadword[4];
#define PKE_REGISTER_WINDOW_SIZE (sizeof(quadword) * PKE_NUM_REGS)
+
/* virtual addresses for source-addr tracking */
#define PKE0_SRCADDR 0x20000020
#define PKE1_SRCADDR 0x20000024
+/* PKE commands */
+
+#define PKE_CMD_PKENOP_MASK 0x7F
+#define PKE_CMD_PKENOP_BITS 0x00
+#define PKE_CMD_STCYCL_MASK 0x7F
+#define PKE_CMD_STCYCL_BITS 0x01
+#define PKE_CMD_OFFSET_MASK 0x7F
+#define PKE_CMD_OFFSET_BITS 0x02
+#define PKE_CMD_BASE_MASK 0x7F
+#define PKE_CMD_BASE_BITS 0x03
+#define PKE_CMD_ITOP_MASK 0x7F
+#define PKE_CMD_ITOP_BITS 0x04
+#define PKE_CMD_STMOD_MASK 0x7F
+#define PKE_CMD_STMOD_BITS 0x05
+#define PKE_CMD_MSKPATH3_MASK 0x7F
+#define PKE_CMD_MSKPATH3_BITS 0x06
+#define PKE_CMD_PKEMARK_MASK 0x7F
+#define PKE_CMD_PKEMARK_BITS 0x07
+#define PKE_CMD_FLUSHE_MASK 0x7F
+#define PKE_CMD_FLUSHE_BITS 0x10
+#define PKE_CMD_FLUSH_MASK 0x7F
+#define PKE_CMD_FLUSH_BITS 0x11
+#define PKE_CMD_FLUSHA_MASK 0x7F
+#define PKE_CMD_FLUSHA_BITS 0x13
+#define PKE_CMD_PKEMSCAL_MASK 0x7F /* CAL == "call" */
+#define PKE_CMD_PKEMSCAL_BITS 0x14
+#define PKE_CMD_PKEMSCNT_MASK 0x7F /* CNT == "continue" */
+#define PKE_CMD_PKEMSCNT_BITS 0x17
+#define PKE_CMD_PKEMSCALF_MASK 0x7F /* CALF == "call after flush" */
+#define PKE_CMD_PKEMSCALF_BITS 0x15
+#define PKE_CMD_STMASK_MASK 0x7F
+#define PKE_CMD_STMASK_BITS 0x20
+#define PKE_CMD_STROW_MASK 0x7F
+#define PKE_CMD_STROW_BITS 0x30
+#define PKE_CMD_STCOL_MASK 0x7F
+#define PKE_CMD_STCOL_BITS 0x31
+#define PKE_CMD_MPG_MASK 0x7F
+#define PKE_CMD_MPG_BITS 0x4A
+#define PKE_CMD_DIRECT_MASK 0x7F
+#define PKE_CMD_DIRECT_BITS 0x50
+#define PKE_CMD_DIRECTHL_MASK 0x7F
+#define PKE_CMD_DIRECTHL_BITS 0x51
+#define PKE_CMD_UNPACK_MASK 0x60
+#define PKE_CMD_UNPACK_BITS 0x60
+
+/* test given word for particular PKE command bit pattern */
+#define IS_PKE_CMD(word,cmd) (((word) & PKE_CMD_##cmd##_MASK) == PKE_CMD_##cmd##_BITS)
+
+
+/* register bitmasks: bit numbers for end and beginning of fields */
+
+/* PKE opcode */
+#define PKE_OPCODE_I_E 31
+#define PKE_OPCODE_I_B 31
+#define PKE_OPCODE_CMD_E 30
+#define PKE_OPCODE_CMD_B 24
+#define PKE_OPCODE_NUM_E 23
+#define PKE_OPCODE_NUM_B 16
+#define PKE_OPCODE_IMM_E 15
+#define PKE_OPCODE_IMM_B 0
+
+/* STAT register */
+#define PKE_REG_STAT_FQC_E 28
+#define PKE_REG_STAT_FQC_B 24
+#define PKE_REG_STAT_FDR_E 23
+#define PKE_REG_STAT_FDR_B 23
+#define PKE_REG_STAT_ER1_E 13
+#define PKE_REG_STAT_ER1_B 13
+#define PKE_REG_STAT_ER0_E 12
+#define PKE_REG_STAT_ER0_B 12
+#define PKE_REG_STAT_INT_E 11
+#define PKE_REG_STAT_INT_B 11
+#define PKE_REG_STAT_PIS_E 10
+#define PKE_REG_STAT_PIS_B 10
+#define PKE_REG_STAT_PFS_E 9
+#define PKE_REG_STAT_PFS_B 9
+#define PKE_REG_STAT_PSS_E 8
+#define PKE_REG_STAT_PSS_B 8
+#define PKE_REG_STAT_DBF_E 7
+#define PKE_REG_STAT_DBF_B 7
+#define PKE_REG_STAT_MRK_E 6
+#define PKE_REG_STAT_MRK_B 6
+#define PKE_REG_STAT_PGW_E 3
+#define PKE_REG_STAT_PGW_B 3
+#define PKE_REG_STAT_PEW_E 2
+#define PKE_REG_STAT_PEW_B 2
+#define PKE_REG_STAT_PPS_E 1
+#define PKE_REG_STAT_PPS_B 0
+
+#define PKE_REG_STAT_PPS_IDLE 0x00
+#define PKE_REG_STAT_PPS_WAIT 0x01
+#define PKE_REG_STAT_PPS_DECODE 0x02
+#define PKE_REG_STAT_PPS_XFER 0x03
+
+/* DBF register */
+#define PKE_REG_DBF_DF_E 0
+#define PKE_REG_DBF_DF_B 0
+
+/* OFST register */
+#define PKE_REG_OFST_OFFSET_E 9
+#define PKE_REG_OFST_OFFSET_B 0
+
+/* OFST register */
+#define PKE_REG_TOPS_TOPS_E 9
+#define PKE_REG_TOPS_TOPS_B 0
+
+/* BASE register */
+#define PKE_REG_BASE_BASE_E 9
+#define PKE_REG_BASE_BASE_B 0
+
+/* ITOPS register */
+#define PKE_REG_ITOPS_ITOPS_E 9
+#define PKE_REG_ITOPS_ITOPS_B 0
+
+/* MODE register */
+#define PKE_REG_MODE_MDE_E 1
+#define PKE_REG_MODE_MDE_B 0
+
+/* MARK register */
+#define PKE_REG_MARK_MARK_E 15
+#define PKE_REG_MARK_MARK_B 0
+
+/* ITOP register */
+#define PKE_REG_ITOP_ITOP_E 9
+#define PKE_REG_ITOP_ITOP_B 0
+
+/* TOP register */
+#define PKE_REG_TOP_TOP_E 9
+#define PKE_REG_TOP_TOP_B 0
+
+/* MASK register */
+#define PKE_REG_MASK_MASK_E 31
+#define PKE_REG_MASK_MASK_B 0
+
+/* CYCLE register */
+#define PKE_REG_CYCLE_WL_E 15
+#define PKE_REG_CYCLE_WL_B 8
+#define PKE_REG_CYCLE_CL_E 7
+#define PKE_REG_CYCLE_CL_B 0
+
+/* ERR register */
+#define PKE_REG_ERR_ME1_E 2
+#define PKE_REG_ERR_ME1_B 2
+#define PKE_REG_ERR_ME0_E 1
+#define PKE_REG_ERR_ME0_B 1
+#define PKE_REG_ERR_MII_E 0
+#define PKE_REG_ERR_MII_B 0
+
+
+/* source-addr for words written to VU/GPUIF ports */
+#define PKE0_SRCADDR 0x20000020 /* from 1998-01-22 e-mail plans */
+#define PKE1_SRCADDR 0x20000024 /* from 1998-01-22 e-mail plans */
+
+
+/* UNPACK opcodes */
+#define PKE_UNPACK(vn,vl) ((vn) << 2 | (vl))
+#define PKE_UNPACK_S_32 PKE_UNPACK(0, 0)
+#define PKE_UNPACK_S_16 PKE_UNPACK(0, 1)
+#define PKE_UNPACK_S_8 PKE_UNPACK(0, 2)
+#define PKE_UNPACK_V2_32 PKE_UNPACK(1, 0)
+#define PKE_UNPACK_V2_16 PKE_UNPACK(1, 1)
+#define PKE_UNPACK_V2_8 PKE_UNPACK(1, 2)
+#define PKE_UNPACK_V3_32 PKE_UNPACK(2, 0)
+#define PKE_UNPACK_V3_16 PKE_UNPACK(2, 1)
+#define PKE_UNPACK_V3_8 PKE_UNPACK(2, 2)
+#define PKE_UNPACK_V4_32 PKE_UNPACK(3, 0)
+#define PKE_UNPACK_V4_16 PKE_UNPACK(3, 1)
+#define PKE_UNPACK_V4_8 PKE_UNPACK(3, 2)
+#define PKE_UNPACK_V4_5 PKE_UNPACK(3, 3)
+
+
+/* MASK register sub-field definitions */
+#define PKE_MASKREG_INPUT 0
+#define PKE_MASKREG_ROW 1
+#define PKE_MASKREG_COLUMN 2
+#define PKE_MASKREG_NOTHING 3
+
+
+/* STMOD register field definitions */
+#define PKE_MODE_INPUT 0
+#define PKE_MODE_ADDROW 1
+#define PKE_MODE_ACCROW 2
+
+
+/* extract a MASK register sub-field for row [0..3] and column [0..3] */
+/* MASK register is laid out of 2-bit values in this r-c order */
+/* m33 m32 m31 m30 m23 m22 m21 m20 m13 m12 m11 m10 m03 m02 m01 m00 */
+#define PKE_MASKREG_GET(me,row,col) \
+((((me)->regs[PKE_REG_MASK][0]) >> (8*(row) + 2*(col))) & 0x03)
+
+
+/* and now a few definitions that rightfully belong elsewhere */
+#ifdef PKE_DEBUG
+
+/* GPUIF addresses */
+#define GPUIF_PATH3_FIFO_ADDR 0x10008020 /* data from CORE */
+#define GPUIF_PATH1_FIFO_ADDR 0x10008030 /* data from VU1 */
+#define GPUIF_PATH2_FIFO_ADDR 0x10008040 /* data from PKE1 */
+
+/* VU STAT register */
+#define VU_REG_STAT_VGW_E 4
+#define VU_REG_STAT_VGW_B 4
+#define VU_REG_STAT_VBS_E 0
+#define VU_REG_STAT_VBS_B 0
+
+/* VU PC pseudo-registers */ /* omitted from 1998-01-22 e-mail plans */
+#define VU0_PC_START 0x20025000
+#define VU1_PC_START 0x20026000
+
+/* VU source-addr tracking tables */ /* changed from 1998-01-22 e-mail plans */
+#define VU0_MEM0_SRCADDR_START 0x21000000
+#define VU0_MEM1_SRCADDR_START 0x21004000
+#define VU1_MEM0_SRCADDR_START 0x21008000
+#define VU1_MEM1_SRCADDR_START 0x2100C000
+
+#endif /* PKE_DEBUG */
+
+
+/* operations */
+/* unsigned 32-bit mask of given width */
+#define BIT_MASK(width) ((((unsigned_4)1) << (width+1)) - 1)
+/* e.g.: BIT_MASK(5) = 00011111 */
+
+/* mask between given given bits numbers (MSB) */
+#define BIT_MASK_BTW(begin,end) (BIT_MASK(end) & ~BIT_MASK(begin))
+/* e.g.: BIT_MASK_BTW(4,11) = 0000111111110000 */
+
+/* set bitfield value */
+#define BIT_MASK_SET(lvalue,begin,end,value) \
+do { \
+ lvalue &= ~BIT_MASK_BTW(begin,end); \
+ lvalue |= (((value) << (begin)) & BIT_MASK_BTW(begin,end)); \
+} while(0)
+
+/* get bitfield value */
+#define BIT_MASK_GET(rvalue,begin,end) \
+ (((rvalue) & BIT_MASK_BTW(begin,end)) >> (begin))
+/* e.g., BIT_MASK_GET(0000111100001111, 2, 8) = 0000000100001100 */
+
+/* get bitfield value, sign-extended to given bit number */
+#define BIT_MASK_GET_SX(rvalue,begin,end,sx) \
+ (BIT_MASK_GET(rvalue,begin,end) | ((BIT_MASK_GET(rvalue,begin,end) & BIT_MASK_BTW(end,end)) ? BIT_MASK_BTW(end,sx) : 0))
+/* e.g., BIT_MASK_GET_SX(0000111100001111, 2, 8, 15) = 1111111100001100 */
+
+
+/* These ugly macro hacks allow succinct bitfield accesses */
+/* set a bitfield in a register by "name" */
+#define PKE_REG_MASK_SET(me,reg,flag,value) \
+ BIT_MASK_SET(((me)->regs[PKE_REG_##reg][0]), \
+ PKE_REG_##reg##_##flag##_B, PKE_REG_##reg##_##flag##_E, \
+ (value))
+
+/* get a bitfield from a register by "name" */
+#define PKE_REG_MASK_GET(me,reg,flag) \
+ BIT_MASK_GET(((me)->regs[PKE_REG_##reg][0]), \
+ PKE_REG_##reg##_##flag##_B, PKE_REG_##reg##_##flag##_E)
+
+
+#define PKE_LIMIT(value,max) ((value) > (max) ? (max) : (value))
+
+
/* One row in the FIFO */
struct fifo_quadword
{
@@ -76,6 +343,8 @@ struct fifo_quadword
quadword data;
/* source main memory address (or 0: unknown) */
address_word source_address;
+ /* DMA tag present in lower 64 bits */
+ unsigned_4 dma_tag_present;
};
@@ -89,9 +358,6 @@ struct pke_device
int pke_number;
int flags;
- address_word register_memory_addr;
- address_word fifo_memory_addr;
-
/* quadword registers */
quadword regs[PKE_NUM_REGS];
@@ -100,10 +366,11 @@ struct pke_device
int fifo_num_elements; /* no. of quadwords occupied in FIFO */
int fifo_buffer_size; /* no. of quadwords of space in FIFO */
FILE* fifo_trace_file; /* or 0 for no trace */
+ /* XXX: assumes FIFOs grow indefinately */
- /* index into FIFO of current instruction */
- int program_counter;
-
+ /* PC */
+ int fifo_pc; /* 0 .. (fifo_num_elements-1): quadword index of next instruction */
+ int qw_pc; /* 0 .. 3: word index of next instruction */
};
@@ -113,5 +380,4 @@ struct pke_device
/* none at present */
-
#endif /* H_PKE_H */