/* rx.c --- opcode semantics for stand-alone RX simulator. Copyright (C) 2008-2013 Free Software Foundation, Inc. Contributed by Red Hat, Inc. This file is part of the GNU simulators. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include "config.h" #include #include #include #include #include "opcode/rx.h" #include "cpu.h" #include "mem.h" #include "syscalls.h" #include "fpu.h" #include "err.h" #include "misc.h" #ifdef CYCLE_STATS static const char * id_names[] = { "RXO_unknown", "RXO_mov", /* d = s (signed) */ "RXO_movbi", /* d = [s,s2] (signed) */ "RXO_movbir", /* [s,s2] = d (signed) */ "RXO_pushm", /* s..s2 */ "RXO_popm", /* s..s2 */ "RXO_xchg", /* s <-> d */ "RXO_stcc", /* d = s if cond(s2) */ "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */ /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note that d may be "None". */ "RXO_and", "RXO_or", "RXO_xor", "RXO_add", "RXO_sub", "RXO_mul", "RXO_div", "RXO_divu", "RXO_shll", "RXO_shar", "RXO_shlr", "RXO_adc", /* d = d + s + carry */ "RXO_sbb", /* d = d - s - ~carry */ "RXO_abs", /* d = |s| */ "RXO_max", /* d = max(d,s) */ "RXO_min", /* d = min(d,s) */ "RXO_emul", /* d:64 = d:32 * s */ "RXO_emulu", /* d:64 = d:32 * s (unsigned) */ "RXO_rolc", /* d <<= 1 through carry */ "RXO_rorc", /* d >>= 1 through carry*/ "RXO_rotl", /* d <<= #s without carry */ "RXO_rotr", /* d >>= #s without carry*/ "RXO_revw", /* d = revw(s) */ "RXO_revl", /* d = revl(s) */ "RXO_branch", /* pc = d if cond(s) */ "RXO_branchrel",/* pc += d if cond(s) */ "RXO_jsr", /* pc = d */ "RXO_jsrrel", /* pc += d */ "RXO_rts", "RXO_nop", "RXO_nop2", "RXO_nop3", "RXO_scmpu", "RXO_smovu", "RXO_smovb", "RXO_suntil", "RXO_swhile", "RXO_smovf", "RXO_sstr", "RXO_rmpa", "RXO_mulhi", "RXO_mullo", "RXO_machi", "RXO_maclo", "RXO_mvtachi", "RXO_mvtaclo", "RXO_mvfachi", "RXO_mvfacmi", "RXO_mvfaclo", "RXO_racw", "RXO_sat", /* sat(d) */ "RXO_satr", "RXO_fadd", /* d op= s */ "RXO_fcmp", "RXO_fsub", "RXO_ftoi", "RXO_fmul", "RXO_fdiv", "RXO_round", "RXO_itof", "RXO_bset", /* d |= (1< = cond(s2) */ "RXO_clrpsw", /* flag index in d */ "RXO_setpsw", /* flag index in d */ "RXO_mvtipl", /* new IPL in s */ "RXO_rtfi", "RXO_rte", "RXO_rtd", /* undocumented */ "RXO_brk", "RXO_dbt", /* undocumented */ "RXO_int", /* vector id in s */ "RXO_stop", "RXO_wait", "RXO_sccnd", /* d = cond(s) ? 1 : 0 */ }; static const char * optype_names[] = { " - ", "#Imm", /* #addend */ " Rn ", /* Rn */ "[Rn]", /* [Rn + addend] */ "Ps++", /* [Rn+] */ "--Pr", /* [-Rn] */ " cc ", /* eq, gtu, etc */ "Flag", /* [UIOSZC] */ "RbRi" /* [Rb + scale * Ri] */ }; #define N_RXO (sizeof(id_names)/sizeof(id_names[0])) #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0])) #define N_MAP 30 static unsigned long long benchmark_start_cycle; static unsigned long long benchmark_end_cycle; static int op_cache[N_RXT][N_RXT][N_RXT]; static int op_cache_rev[N_MAP]; static int op_cache_idx = 0; static int op_lookup (int a, int b, int c) { if (op_cache[a][b][c]) return op_cache[a][b][c]; op_cache_idx ++; if (op_cache_idx >= N_MAP) { printf("op_cache_idx exceeds %d\n", N_MAP); exit(1); } op_cache[a][b][c] = op_cache_idx; op_cache_rev[op_cache_idx] = (a<<8) | (b<<4) | c; return op_cache_idx; } static char * op_cache_string (int map) { static int ci; static char cb[5][20]; int a, b, c; map = op_cache_rev[map]; a = (map >> 8) & 15; b = (map >> 4) & 15; c = (map >> 0) & 15; ci = (ci + 1) % 5; sprintf(cb[ci], "%s %s %s", optype_names[a], optype_names[b], optype_names[c]); return cb[ci]; } static unsigned long long cycles_per_id[N_RXO][N_MAP]; static unsigned long long times_per_id[N_RXO][N_MAP]; static unsigned long long memory_stalls; static unsigned long long register_stalls; static unsigned long long branch_stalls; static unsigned long long branch_alignment_stalls; static unsigned long long fast_returns; static unsigned long times_per_pair[N_RXO][N_MAP][N_RXO][N_MAP]; static int prev_opcode_id = RXO_unknown; static int po0; #define STATS(x) x #else #define STATS(x) #endif /* CYCLE_STATS */ #ifdef CYCLE_ACCURATE static int new_rt = -1; /* Number of cycles to add if an insn spans an 8-byte boundary. */ static int branch_alignment_penalty = 0; #endif static int running_benchmark = 1; #define tprintf if (trace && running_benchmark) printf jmp_buf decode_jmp_buf; unsigned int rx_cycles = 0; #ifdef CYCLE_ACCURATE /* If nonzero, memory was read at some point and cycle latency might take effect. */ static int memory_source = 0; /* If nonzero, memory was written and extra cycles might be needed. */ static int memory_dest = 0; static void cycles (int throughput) { tprintf("%d cycles\n", throughput); regs.cycle_count += throughput; } /* Number of execution (E) cycles the op uses. For memory sources, we include the load micro-op stall as two extra E cycles. */ #define E(c) cycles (memory_source ? c + 2 : c) #define E1 cycles (1) #define E2 cycles (2) #define EBIT cycles (memory_source ? 2 : 1) /* Check to see if a read latency must be applied for a given register. */ #define RL(r) \ if (regs.rt == r ) \ { \ tprintf("register %d load stall\n", r); \ regs.cycle_count ++; \ STATS(register_stalls ++); \ regs.rt = -1; \ } #define RLD(r) \ if (memory_source) \ { \ tprintf ("Rt now %d\n", r); \ new_rt = r; \ } static int lsb_count (unsigned long v, int is_signed) { int i, lsb; if (is_signed && (v & 0x80000000U)) v = (unsigned long)(long)(-v); for (i=31; i>=0; i--) if (v & (1 << i)) { /* v is 0..31, we want 1=1-2, 2=3-4, 3=5-6, etc. */ lsb = (i + 2) / 2; return lsb; } return 0; } static int divu_cycles(unsigned long num, unsigned long den) { int nb = lsb_count (num, 0); int db = lsb_count (den, 0); int rv; if (nb < db) rv = 2; else rv = 3 + nb - db; E (rv); return rv; } static int div_cycles(long num, long den) { int nb = lsb_count ((unsigned long)num, 1); int db = lsb_count ((unsigned long)den, 1); int rv; if (nb < db) rv = 3; else rv = 5 + nb - db; E (rv); return rv; } #else /* !CYCLE_ACCURATE */ #define cycles(t) #define E(c) #define E1 #define E2 #define EBIT #define RL(r) #define RLD(r) #define divu_cycles(n,d) #define div_cycles(n,d) #endif /* else CYCLE_ACCURATE */ static int size2bytes[] = { 4, 1, 1, 1, 2, 2, 2, 3, 4 }; typedef struct { unsigned long dpc; } RX_Data; #define rx_abort() _rx_abort(__FILE__, __LINE__) static void _rx_abort (const char *file, int line) { if (strrchr (file, '/')) file = strrchr (file, '/') + 1; fprintf(stderr, "abort at %s:%d\n", file, line); abort(); } static unsigned char *get_byte_base; static RX_Opcode_Decoded **decode_cache_base; static SI get_byte_page; void reset_decoder (void) { get_byte_base = 0; decode_cache_base = 0; get_byte_page = 0; } static inline void maybe_get_mem_page (SI tpc) { if (((tpc ^ get_byte_page) & NONPAGE_MASK) || enable_counting) { get_byte_page = tpc & NONPAGE_MASK; get_byte_base = rx_mem_ptr (get_byte_page, MPA_READING) - get_byte_page; decode_cache_base = rx_mem_decode_cache (get_byte_page) - get_byte_page; } } /* This gets called a *lot* so optimize it. */ static int rx_get_byte (void *vdata) { RX_Data *rx_data = (RX_Data *)vdata; SI tpc = rx_data->dpc; /* See load.c for an explanation of this. */ if (rx_big_endian) tpc ^= 3; maybe_get_mem_page (tpc); rx_data->dpc ++; return get_byte_base [tpc]; } static int get_op (const RX_Opcode_Decoded *rd, int i) { const RX_Opcode_Operand *o = rd->op + i; int addr, rv = 0; switch (o->type) { case RX_Operand_None: rx_abort (); case RX_Operand_Immediate: /* #addend */ return o->addend; case RX_Operand_Register: /* Rn */ RL (o->reg); rv = get_reg (o->reg); break; case RX_Operand_Predec: /* [-Rn] */ put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]); /* fall through */ case RX_Operand_Postinc: /* [Rn+] */ case RX_Operand_Indirect: /* [Rn + addend] */ case RX_Operand_TwoReg: /* [Rn + scale * R2] */ #ifdef CYCLE_ACCURATE RL (o->reg); if (o->type == RX_Operand_TwoReg) RL (rd->op[2].reg); regs.rt = -1; if (regs.m2m == M2M_BOTH) { tprintf("src memory stall\n"); #ifdef CYCLE_STATS memory_stalls ++; #endif regs.cycle_count ++; regs.m2m = 0; } memory_source = 1; #endif if (o->type == RX_Operand_TwoReg) addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg); else addr = get_reg (o->reg) + o->addend; switch (o->size) { case RX_AnySize: rx_abort (); case RX_Byte: /* undefined extension */ case RX_UByte: case RX_SByte: rv = mem_get_qi (addr); break; case RX_Word: /* undefined extension */ case RX_UWord: case RX_SWord: rv = mem_get_hi (addr); break; case RX_3Byte: rv = mem_get_psi (addr); break; case RX_Long: rv = mem_get_si (addr); break; } if (o->type == RX_Operand_Postinc) put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]); break; case RX_Operand_Condition: /* eq, gtu, etc */ return condition_true (o->reg); case RX_Operand_Flag: /* [UIOSZC] */ return (regs.r_psw & (1 << o->reg)) ? 1 : 0; } /* if we've gotten here, we need to clip/extend the value according to the size. */ switch (o->size) { case RX_AnySize: rx_abort (); case RX_Byte: /* undefined extension */ rv |= 0xdeadbe00; /* keep them honest */ break; case RX_UByte: rv &= 0xff; break; case RX_SByte: rv = sign_ext (rv, 8); break; case RX_Word: /* undefined extension */ rv |= 0xdead0000; /* keep them honest */ break; case RX_UWord: rv &= 0xffff; break; case RX_SWord: rv = sign_ext (rv, 16); break; case RX_3Byte: rv &= 0xffffff; break; case RX_Long: break; } return rv; } static void put_op (const RX_Opcode_Decoded *rd, int i, int v) { const RX_Opcode_Operand *o = rd->op + i; int addr; switch (o->size) { case RX_AnySize: if (o->type != RX_Operand_Register) rx_abort (); break; case RX_Byte: /* undefined extension */ v |= 0xdeadbe00; /* keep them honest */ break; case RX_UByte: v &= 0xff; break; case RX_SByte: v = sign_ext (v, 8); break; case RX_Word: /* undefined extension */ v |= 0xdead0000; /* keep them honest */ break; case RX_UWord: v &= 0xffff; break; case RX_SWord: v = sign_ext (v, 16); break; case RX_3Byte: v &= 0xffffff; break; case RX_Long: break; } switch (o->type) { case RX_Operand_None: /* Opcodes like TST and CMP use this. */ break; case RX_Operand_Immediate: /* #addend */ case RX_Operand_Condition: /* eq, gtu, etc */ rx_abort (); case RX_Operand_Register: /* Rn */ put_reg (o->reg, v); RLD (o->reg); break; case RX_Operand_Predec: /* [-Rn] */ put_reg (o->reg, get_reg (o->reg) - size2bytes[o->size]); /* fall through */ case RX_Operand_Postinc: /* [Rn+] */ case RX_Operand_Indirect: /* [Rn + addend] */ case RX_Operand_TwoReg: /* [Rn + scale * R2] */ #ifdef CYCLE_ACCURATE if (regs.m2m == M2M_BOTH) { tprintf("dst memory stall\n"); regs.cycle_count ++; #ifdef CYCLE_STATS memory_stalls ++; #endif regs.m2m = 0; } memory_dest = 1; #endif if (o->type == RX_Operand_TwoReg) addr = get_reg (o->reg) * size2bytes[rd->size] + get_reg (rd->op[2].reg); else addr = get_reg (o->reg) + o->addend; switch (o->size) { case RX_AnySize: rx_abort (); case RX_Byte: /* undefined extension */ case RX_UByte: case RX_SByte: mem_put_qi (addr, v); break; case RX_Word: /* undefined extension */ case RX_UWord: case RX_SWord: mem_put_hi (addr, v); break; case RX_3Byte: mem_put_psi (addr, v); break; case RX_Long: mem_put_si (addr, v); break; } if (o->type == RX_Operand_Postinc) put_reg (o->reg, get_reg (o->reg) + size2bytes[o->size]); break; case RX_Operand_Flag: /* [UIOSZC] */ if (v) regs.r_psw |= (1 << o->reg); else regs.r_psw &= ~(1 << o->reg); break; } } #define PD(x) put_op (opcode, 0, x) #define PS(x) put_op (opcode, 1, x) #define PS2(x) put_op (opcode, 2, x) #define GD() get_op (opcode, 0) #define GS() get_op (opcode, 1) #define GS2() get_op (opcode, 2) #define DSZ() size2bytes[opcode->op[0].size] #define SSZ() size2bytes[opcode->op[0].size] #define S2SZ() size2bytes[opcode->op[0].size] /* "Universal" sources. */ #define US1() ((opcode->op[2].type == RX_Operand_None) ? GD() : GS()) #define US2() ((opcode->op[2].type == RX_Operand_None) ? GS() : GS2()) static void push(int val) { int rsp = get_reg (sp); rsp -= 4; put_reg (sp, rsp); mem_put_si (rsp, val); } /* Just like the above, but tag the memory as "pushed pc" so if anyone tries to write to it, it will cause an error. */ static void pushpc(int val) { int rsp = get_reg (sp); rsp -= 4; put_reg (sp, rsp); mem_put_si (rsp, val); mem_set_content_range (rsp, rsp+3, MC_PUSHED_PC); } static int pop() { int rv; int rsp = get_reg (sp); rv = mem_get_si (rsp); rsp += 4; put_reg (sp, rsp); return rv; } static int poppc() { int rv; int rsp = get_reg (sp); if (mem_get_content_type (rsp) != MC_PUSHED_PC) execution_error (SIM_ERR_CORRUPT_STACK, rsp); rv = mem_get_si (rsp); mem_set_content_range (rsp, rsp+3, MC_UNINIT); rsp += 4; put_reg (sp, rsp); return rv; } #define MATH_OP(vop,c) \ { \ umb = US2(); \ uma = US1(); \ ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \ tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \ ma = sign_ext (uma, DSZ() * 8); \ mb = sign_ext (umb, DSZ() * 8); \ sll = (long long) ma vop (long long) mb vop c; \ tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \ set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \ PD (sll); \ E (1); \ } #define LOGIC_OP(vop) \ { \ mb = US2(); \ ma = US1(); \ v = ma vop mb; \ tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \ set_sz (v, DSZ()); \ PD(v); \ E (1); \ } #define SHIFT_OP(val, type, count, OP, carry_mask) \ { \ int i, c=0; \ count = US2(); \ val = (type)US1(); \ tprintf("%lld " #OP " %d\n", val, count); \ for (i = 0; i < count; i ++) \ { \ c = val & carry_mask; \ val OP 1; \ } \ set_oszc (val, 4, c); \ PD (val); \ } typedef union { int i; float f; } FloatInt; static inline int float2int (float f) { FloatInt fi; fi.f = f; return fi.i; } static inline float int2float (int i) { FloatInt fi; fi.i = i; return fi.f; } static int fop_fadd (fp_t s1, fp_t s2, fp_t *d) { *d = rxfp_add (s1, s2); return 1; } static int fop_fmul (fp_t s1, fp_t s2, fp_t *d) { *d = rxfp_mul (s1, s2); return 1; } static int fop_fdiv (fp_t s1, fp_t s2, fp_t *d) { *d = rxfp_div (s1, s2); return 1; } static int fop_fsub (fp_t s1, fp_t s2, fp_t *d) { *d = rxfp_sub (s1, s2); return 1; } #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH)))) #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR #define FPCHECK() \ if (FPPENDING()) \ return do_fp_exception (opcode_pc) #define FLOAT_OP(func) \ { \ int do_store; \ fp_t fa, fb, fc; \ FPCLEAR(); \ fb = GS (); \ fa = GD (); \ do_store = fop_##func (fa, fb, &fc); \ tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \ FPCHECK(); \ if (do_store) \ PD (fc); \ mb = 0; \ if ((fc & 0x80000000UL) != 0) \ mb |= FLAGBIT_S; \ if ((fc & 0x7fffffffUL) == 0) \ mb |= FLAGBIT_Z; \ set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \ } #define carry (FLAG_C ? 1 : 0) static struct { unsigned long vaddr; const char *str; int signal; } exception_info[] = { { 0xFFFFFFD0UL, "priviledged opcode", SIGILL }, { 0xFFFFFFD4UL, "access violation", SIGSEGV }, { 0xFFFFFFDCUL, "undefined opcode", SIGILL }, { 0xFFFFFFE4UL, "floating point", SIGFPE } }; #define EX_PRIVILEDGED 0 #define EX_ACCESS 1 #define EX_UNDEFINED 2 #define EX_FLOATING 3 #define EXCEPTION(n) \ return generate_exception (n, opcode_pc) #define PRIVILEDGED() \ if (FLAG_PM) \ EXCEPTION (EX_PRIVILEDGED) static int generate_exception (unsigned long type, SI opcode_pc) { SI old_psw, old_pc, new_pc; new_pc = mem_get_si (exception_info[type].vaddr); /* 0x00020000 is the value used to initialise the known exception vectors (see rx.ld), but it is a reserved area of memory so do not try to access it, and if the value has not been changed by the program then the vector has not been installed. */ if (new_pc == 0 || new_pc == 0x00020000) { if (rx_in_gdb) return RX_MAKE_STOPPED (exception_info[type].signal); fprintf(stderr, "Unhandled %s exception at pc = %#lx\n", exception_info[type].str, (unsigned long) opcode_pc); if (type == EX_FLOATING) { int mask = FPPENDING (); fprintf (stderr, "Pending FP exceptions:"); if (mask & FPSWBITS_FV) fprintf(stderr, " Invalid"); if (mask & FPSWBITS_FO) fprintf(stderr, " Overflow"); if (mask & FPSWBITS_FZ) fprintf(stderr, " Division-by-zero"); if (mask & FPSWBITS_FU) fprintf(stderr, " Underflow"); if (mask & FPSWBITS_FX) fprintf(stderr, " Inexact"); if (mask & FPSWBITS_CE) fprintf(stderr, " Unimplemented"); fprintf(stderr, "\n"); } return RX_MAKE_EXITED (1); } tprintf ("Triggering %s exception\n", exception_info[type].str); old_psw = regs.r_psw; regs.r_psw &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM); old_pc = opcode_pc; regs.r_pc = new_pc; pushpc (old_psw); pushpc (old_pc); return RX_MAKE_STEPPED (); } void generate_access_exception (void) { int rv; rv = generate_exception (EX_ACCESS, regs.r_pc); if (RX_EXITED (rv)) longjmp (decode_jmp_buf, rv); } static int do_fp_exception (unsigned long opcode_pc) { while (FPPENDING()) EXCEPTION (EX_FLOATING); return RX_MAKE_STEPPED (); } static int op_is_memory (const RX_Opcode_Decoded *rd, int i) { switch (rd->op[i].type) { case RX_Operand_Predec: case RX_Operand_Postinc: case RX_Operand_Indirect: return 1; default: return 0; } } #define OM(i) op_is_memory (opcode, i) #define DO_RETURN(x) { longjmp (decode_jmp_buf, x); } int decode_opcode () { unsigned int uma=0, umb=0; int ma=0, mb=0; int opcode_size, v; unsigned long long ll; long long sll; unsigned long opcode_pc; RX_Data rx_data; const RX_Opcode_Decoded *opcode; #ifdef CYCLE_STATS unsigned long long prev_cycle_count; #endif #ifdef CYCLE_ACCURATE unsigned int tx; #endif #ifdef CYCLE_STATS prev_cycle_count = regs.cycle_count; #endif #ifdef CYCLE_ACCURATE memory_source = 0; memory_dest = 0; #endif rx_cycles ++; maybe_get_mem_page (regs.r_pc); opcode_pc = regs.r_pc; /* Note that we don't word-swap this point, there's no point. */ if (decode_cache_base[opcode_pc] == NULL) { RX_Opcode_Decoded *opcode_w; rx_data.dpc = opcode_pc; opcode_w = decode_cache_base[opcode_pc] = calloc (1, sizeof (RX_Opcode_Decoded)); opcode_size = rx_decode_opcode (opcode_pc, opcode_w, rx_get_byte, &rx_data); opcode = opcode_w; } else { opcode = decode_cache_base[opcode_pc]; opcode_size = opcode->n_bytes; } #ifdef CYCLE_ACCURATE if (branch_alignment_penalty) { if ((regs.r_pc ^ (regs.r_pc + opcode_size - 1)) & ~7) { tprintf("1 cycle branch alignment penalty\n"); cycles (branch_alignment_penalty); #ifdef CYCLE_STATS branch_alignment_stalls ++; #endif } branch_alignment_penalty = 0; } #endif regs.r_pc += opcode_size; rx_flagmask = opcode->flags_s; rx_flagand = ~(int)opcode->flags_0; rx_flagor = opcode->flags_1; switch (opcode->id) { case RXO_abs: sll = GS (); tprintf("|%lld| = ", sll); if (sll < 0) sll = -sll; tprintf("%lld\n", sll); PD (sll); set_osz (sll, 4); E (1); break; case RXO_adc: MATH_OP (+,carry); break; case RXO_add: MATH_OP (+,0); break; case RXO_and: LOGIC_OP (&); break; case RXO_bclr: ma = GD (); mb = GS (); if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; ma &= ~(1 << mb); PD (ma); EBIT; break; case RXO_bmcc: ma = GD (); mb = GS (); if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; if (GS2 ()) ma |= (1 << mb); else ma &= ~(1 << mb); PD (ma); EBIT; break; case RXO_bnot: ma = GD (); mb = GS (); if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; ma ^= (1 << mb); PD (ma); EBIT; break; case RXO_branch: if (opcode->op[1].type == RX_Operand_None || GS()) { #ifdef CYCLE_ACCURATE SI old_pc = regs.r_pc; int delta; #endif regs.r_pc = GD(); #ifdef CYCLE_ACCURATE delta = regs.r_pc - old_pc; if (delta >= 0 && delta < 16 && opcode_size > 1) { tprintf("near forward branch bonus\n"); cycles (2); } else { cycles (3); branch_alignment_penalty = 1; } #ifdef CYCLE_STATS branch_stalls ++; #endif #endif } #ifdef CYCLE_ACCURATE else cycles (1); #endif break; case RXO_branchrel: if (opcode->op[1].type == RX_Operand_None || GS()) { int delta = GD(); regs.r_pc = opcode_pc + delta; #ifdef CYCLE_ACCURATE /* Note: specs say 3, chip says 2. */ if (delta >= 0 && delta < 16 && opcode_size > 1) { tprintf("near forward branch bonus\n"); cycles (2); } else { cycles (3); branch_alignment_penalty = 1; } #ifdef CYCLE_STATS branch_stalls ++; #endif #endif } #ifdef CYCLE_ACCURATE else cycles (1); #endif break; case RXO_brk: { int old_psw = regs.r_psw; if (rx_in_gdb) DO_RETURN (RX_MAKE_HIT_BREAK ()); if (regs.r_intb == 0) { tprintf("BREAK hit, no vector table.\n"); DO_RETURN (RX_MAKE_EXITED(1)); } regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM); pushpc (old_psw); pushpc (regs.r_pc); regs.r_pc = mem_get_si (regs.r_intb); cycles(6); } break; case RXO_bset: ma = GD (); mb = GS (); if (opcode->op[0].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; ma |= (1 << mb); PD (ma); EBIT; break; case RXO_btst: ma = GS (); mb = GS2 (); if (opcode->op[1].type == RX_Operand_Register) mb &= 0x1f; else mb &= 0x07; umb = ma & (1 << mb); set_zc (! umb, umb); EBIT; break; case RXO_clrpsw: v = 1 << opcode->op[0].reg; if (FLAG_PM && (v == FLAGBIT_I || v == FLAGBIT_U)) break; regs.r_psw &= ~v; cycles (1); break; case RXO_div: /* d = d / s */ ma = GS(); mb = GD(); tprintf("%d / %d = ", mb, ma); if (ma == 0 || (ma == -1 && (unsigned int) mb == 0x80000000)) { tprintf("#NAN\n"); set_flags (FLAGBIT_O, FLAGBIT_O); cycles (3); } else { v = mb/ma; tprintf("%d\n", v); set_flags (FLAGBIT_O, 0); PD (v); div_cycles (mb, ma); } break; case RXO_divu: /* d = d / s */ uma = GS(); umb = GD(); tprintf("%u / %u = ", umb, uma); if (uma == 0) { tprintf("#NAN\n"); set_flags (FLAGBIT_O, FLAGBIT_O); cycles (2); } else { v = umb / uma; tprintf("%u\n", v); set_flags (FLAGBIT_O, 0); PD (v); divu_cycles (umb, uma); } break; case RXO_emul: ma = GD (); mb = GS (); sll = (long long)ma * (long long)mb; tprintf("%d * %d = %lld\n", ma, mb, sll); put_reg (opcode->op[0].reg, sll); put_reg (opcode->op[0].reg + 1, sll >> 32); E2; break; case RXO_emulu: uma = GD (); umb = GS (); ll = (long long)uma * (long long)umb; tprintf("%#x * %#x = %#llx\n", uma, umb, ll); put_reg (opcode->op[0].reg, ll); put_reg (opcode->op[0].reg + 1, ll >> 32); E2; break; case RXO_fadd: FLOAT_OP (fadd); E (4); break; case RXO_fcmp: ma = GD(); mb = GS(); FPCLEAR (); rxfp_cmp (ma, mb); FPCHECK (); E (1); break; case RXO_fdiv: FLOAT_OP (fdiv); E (16); break; case RXO_fmul: FLOAT_OP (fmul); E (3); break; case RXO_rtfi: PRIVILEDGED (); regs.r_psw = regs.r_bpsw; regs.r_pc = regs.r_bpc; #ifdef CYCLE_ACCURATE regs.fast_return = 0; cycles(3); #endif break; case RXO_fsub: FLOAT_OP (fsub); E (4); break; case RXO_ftoi: ma = GS (); FPCLEAR (); mb = rxfp_ftoi (ma, FPRM_ZERO); FPCHECK (); PD (mb); tprintf("(int) %g = %d\n", int2float(ma), mb); set_sz (mb, 4); E (2); break; case RXO_int: v = GS (); if (v == 255) { int rc = rx_syscall (regs.r[5]); if (! RX_STEPPED (rc)) DO_RETURN (rc); } else { int old_psw = regs.r_psw; regs.r_psw &= ~(FLAGBIT_I | FLAGBIT_U | FLAGBIT_PM); pushpc (old_psw); pushpc (regs.r_pc); regs.r_pc = mem_get_si (regs.r_intb + 4 * v); } cycles (6); break; case RXO_itof: ma = GS (); FPCLEAR (); mb = rxfp_itof (ma, regs.r_fpsw); FPCHECK (); tprintf("(float) %d = %x\n", ma, mb); PD (mb); set_sz (ma, 4); E (2); break; case RXO_jsr: case RXO_jsrrel: { #ifdef CYCLE_ACCURATE int delta; regs.m2m = 0; #endif v = GD (); #ifdef CYCLE_ACCURATE regs.link_register = regs.r_pc; #endif pushpc (get_reg (pc)); if (opcode->id == RXO_jsrrel) v += regs.r_pc; #ifdef CYCLE_ACCURATE delta = v - regs.r_pc; #endif put_reg (pc, v); #ifdef CYCLE_ACCURATE /* Note: docs say 3, chip says 2 */ if (delta >= 0 && delta < 16) { tprintf ("near forward jsr bonus\n"); cycles (2); } else { branch_alignment_penalty = 1; cycles (3); } regs.fast_return = 1; #endif } break; case RXO_machi: ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16); ll <<= 16; put_reg64 (acc64, ll + regs.r_acc); E1; break; case RXO_maclo: ll = (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ()); ll <<= 16; put_reg64 (acc64, ll + regs.r_acc); E1; break; case RXO_max: mb = GS(); ma = GD(); if (ma > mb) PD (ma); else PD (mb); E (1); break; case RXO_min: mb = GS(); ma = GD(); if (ma < mb) PD (ma); else PD (mb); E (1); break; case RXO_mov: v = GS (); if (opcode->op[1].type == RX_Operand_Register && opcode->op[1].reg == 17 /* PC */) { /* Special case. We want the address of the insn, not the address of the next insn. */ v = opcode_pc; } if (opcode->op[0].type == RX_Operand_Register && opcode->op[0].reg == 16 /* PSW */) { /* Special case, LDC and POPC can't ever modify PM. */ int pm = regs.r_psw & FLAGBIT_PM; v &= ~ FLAGBIT_PM; v |= pm; if (pm) { v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL); v |= pm; } } if (FLAG_PM) { /* various things can't be changed in user mode. */ if (opcode->op[0].type == RX_Operand_Register) if (opcode->op[0].reg == 32) { v &= ~ (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL); v |= regs.r_psw & (FLAGBIT_I | FLAGBIT_U | FLAGBITS_IPL); } if (opcode->op[0].reg == 34 /* ISP */ || opcode->op[0].reg == 37 /* BPSW */ || opcode->op[0].reg == 39 /* INTB */ || opcode->op[0].reg == 38 /* VCT */) /* These are ignored. */ break; } if (OM(0) && OM(1)) cycles (2); else cycles (1); PD (v); #ifdef CYCLE_ACCURATE if ((opcode->op[0].type == RX_Operand_Predec && opcode->op[1].type == RX_Operand_Register) || (opcode->op[0].type == RX_Operand_Postinc && opcode->op[1].type == RX_Operand_Register)) { /* Special case: push reg doesn't cause a memory stall. */ memory_dest = 0; tprintf("push special case\n"); } #endif set_sz (v, DSZ()); break; case RXO_movbi: PD (GS ()); cycles (1); break; case RXO_movbir: PS (GD ()); cycles (1); break; case RXO_mul: v = US2 (); ll = (unsigned long long) US1() * (unsigned long long) v; PD(ll); E (1); break; case RXO_mulhi: v = GS2 (); ll = (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v >> 16); ll <<= 16; put_reg64 (acc64, ll); E1; break; case RXO_mullo: v = GS2 (); ll = (long long)(signed short)(GS()) * (long long)(signed short)(v); ll <<= 16; put_reg64 (acc64, ll); E1; break; case RXO_mvfachi: PD (get_reg (acchi)); E1; break; case RXO_mvfaclo: PD (get_reg (acclo)); E1; break; case RXO_mvfacmi: PD (get_reg (accmi)); E1; break; case RXO_mvtachi: put_reg (acchi, GS ()); E1; break; case RXO_mvtaclo: put_reg (acclo, GS ()); E1; break; case RXO_mvtipl: regs.r_psw &= ~ FLAGBITS_IPL; regs.r_psw |= (GS () << FLAGSHIFT_IPL) & FLAGBITS_IPL; E1; break; case RXO_nop: case RXO_nop2: case RXO_nop3: E1; break; case RXO_or: LOGIC_OP (|); break; case RXO_popm: /* POPM cannot pop R0 (sp). */ if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0) EXCEPTION (EX_UNDEFINED); if (opcode->op[1].reg >= opcode->op[2].reg) { regs.r_pc = opcode_pc; DO_RETURN (RX_MAKE_STOPPED (SIGILL)); } for (v = opcode->op[1].reg; v <= opcode->op[2].reg; v++) { cycles (1); RLD (v); put_reg (v, pop ()); } break; case RXO_pushm: /* PUSHM cannot push R0 (sp). */ if (opcode->op[1].reg == 0 || opcode->op[2].reg == 0) EXCEPTION (EX_UNDEFINED); if (opcode->op[1].reg >= opcode->op[2].reg) { regs.r_pc = opcode_pc; return RX_MAKE_STOPPED (SIGILL); } for (v = opcode->op[2].reg; v >= opcode->op[1].reg; v--) { RL (v); push (get_reg (v)); } cycles (opcode->op[2].reg - opcode->op[1].reg + 1); break; case RXO_racw: ll = get_reg64 (acc64) << GS (); ll += 0x80000000ULL; if ((signed long long)ll > (signed long long)0x00007fff00000000ULL) ll = 0x00007fff00000000ULL; else if ((signed long long)ll < (signed long long)0xffff800000000000ULL) ll = 0xffff800000000000ULL; else ll &= 0xffffffff00000000ULL; put_reg64 (acc64, ll); E1; break; case RXO_rte: PRIVILEDGED (); regs.r_pc = poppc (); regs.r_psw = poppc (); if (FLAG_PM) regs.r_psw |= FLAGBIT_U; #ifdef CYCLE_ACCURATE regs.fast_return = 0; cycles (6); #endif break; case RXO_revl: uma = GS (); umb = (((uma >> 24) & 0xff) | ((uma >> 8) & 0xff00) | ((uma << 8) & 0xff0000) | ((uma << 24) & 0xff000000UL)); PD (umb); E1; break; case RXO_revw: uma = GS (); umb = (((uma >> 8) & 0x00ff00ff) | ((uma << 8) & 0xff00ff00UL)); PD (umb); E1; break; case RXO_rmpa: RL(4); RL(5); #ifdef CYCLE_ACCURATE tx = regs.r[3]; #endif while (regs.r[3] != 0) { long long tmp; switch (opcode->size) { case RX_Long: ma = mem_get_si (regs.r[1]); mb = mem_get_si (regs.r[2]); regs.r[1] += 4; regs.r[2] += 4; break; case RX_Word: ma = sign_ext (mem_get_hi (regs.r[1]), 16); mb = sign_ext (mem_get_hi (regs.r[2]), 16); regs.r[1] += 2; regs.r[2] += 2; break; case RX_Byte: ma = sign_ext (mem_get_qi (regs.r[1]), 8); mb = sign_ext (mem_get_qi (regs.r[2]), 8); regs.r[1] += 1; regs.r[2] += 1; break; default: abort (); } /* We do the multiply as a signed value. */ sll = (long long)ma * (long long)mb; tprintf(" %016llx = %d * %d\n", sll, ma, mb); /* but we do the sum as unsigned, while sign extending the operands. */ tmp = regs.r[4] + (sll & 0xffffffffUL); regs.r[4] = tmp & 0xffffffffUL; tmp >>= 32; sll >>= 32; tmp += regs.r[5] + (sll & 0xffffffffUL); regs.r[5] = tmp & 0xffffffffUL; tmp >>= 32; sll >>= 32; tmp += regs.r[6] + (sll & 0xffffffffUL); regs.r[6] = tmp & 0xffffffffUL; tprintf("%08lx\033[36m%08lx\033[0m%08lx\n", (unsigned long) regs.r[6], (unsigned long) regs.r[5], (unsigned long) regs.r[4]); regs.r[3] --; } if (regs.r[6] & 0x00008000) regs.r[6] |= 0xffff0000UL; else regs.r[6] &= 0x0000ffff; ma = (regs.r[6] & 0x80000000UL) ? FLAGBIT_S : 0; if (regs.r[6] != 0 && regs.r[6] != 0xffffffffUL) set_flags (FLAGBIT_O|FLAGBIT_S, ma | FLAGBIT_O); else set_flags (FLAGBIT_O|FLAGBIT_S, ma); #ifdef CYCLE_ACCURATE switch (opcode->size) { case RX_Long: cycles (6 + 4 * tx); break; case RX_Word: cycles (6 + 5 * (tx / 2) + 4 * (tx % 2)); break; case RX_Byte: cycles (6 + 7 * (tx / 4) + 4 * (tx % 4)); break; default: abort (); } #endif break; case RXO_rolc: v = GD (); ma = v & 0x80000000UL; v <<= 1; v |= carry; set_szc (v, 4, ma); PD (v); E1; break; case RXO_rorc: uma = GD (); mb = uma & 1; uma >>= 1; uma |= (carry ? 0x80000000UL : 0); set_szc (uma, 4, mb); PD (uma); E1; break; case RXO_rotl: mb = GS (); uma = GD (); if (mb) { uma = (uma << mb) | (uma >> (32-mb)); mb = uma & 1; } set_szc (uma, 4, mb); PD (uma); E1; break; case RXO_rotr: mb = GS (); uma = GD (); if (mb) { uma = (uma >> mb) | (uma << (32-mb)); mb = uma & 0x80000000; } set_szc (uma, 4, mb); PD (uma); E1; break; case RXO_round: ma = GS (); FPCLEAR (); mb = rxfp_ftoi (ma, regs.r_fpsw); FPCHECK (); PD (mb); tprintf("(int) %g = %d\n", int2float(ma), mb); set_sz (mb, 4); E (2); break; case RXO_rts: { #ifdef CYCLE_ACCURATE int cyc = 5; #endif regs.r_pc = poppc (); #ifdef CYCLE_ACCURATE /* Note: specs say 5, chip says 3. */ if (regs.fast_return && regs.link_register == regs.r_pc) { #ifdef CYCLE_STATS fast_returns ++; #endif tprintf("fast return bonus\n"); cyc -= 2; } cycles (cyc); regs.fast_return = 0; branch_alignment_penalty = 1; #endif } break; case RXO_rtsd: if (opcode->op[2].type == RX_Operand_Register) { int i; /* RTSD cannot pop R0 (sp). */ put_reg (0, get_reg (0) + GS() - (opcode->op[0].reg-opcode->op[2].reg+1)*4); if (opcode->op[2].reg == 0) EXCEPTION (EX_UNDEFINED); #ifdef CYCLE_ACCURATE tx = opcode->op[0].reg - opcode->op[2].reg + 1; #endif for (i = opcode->op[2].reg; i <= opcode->op[0].reg; i ++) { RLD (i); put_reg (i, pop ()); } } else { #ifdef CYCLE_ACCURATE tx = 0; #endif put_reg (0, get_reg (0) + GS()); } put_reg (pc, poppc()); #ifdef CYCLE_ACCURATE if (regs.fast_return && regs.link_register == regs.r_pc) { tprintf("fast return bonus\n"); #ifdef CYCLE_STATS fast_returns ++; #endif cycles (tx < 3 ? 3 : tx + 1); } else { cycles (tx < 5 ? 5 : tx + 1); } regs.fast_return = 0; branch_alignment_penalty = 1; #endif break; case RXO_sat: if (FLAG_O && FLAG_S) PD (0x7fffffffUL); else if (FLAG_O && ! FLAG_S) PD (0x80000000UL); E1; break; case RXO_satr: if (FLAG_O && ! FLAG_S) { put_reg (6, 0x0); put_reg (5, 0x7fffffff); put_reg (4, 0xffffffff); } else if (FLAG_O && FLAG_S) { put_reg (6, 0xffffffff); put_reg (5, 0x80000000); put_reg (4, 0x0); } E1; break; case RXO_sbb: MATH_OP (-, ! carry); break; case RXO_sccnd: if (GS()) PD (1); else PD (0); E1; break; case RXO_scmpu: #ifdef CYCLE_ACCURATE tx = regs.r[3]; #endif while (regs.r[3] != 0) { uma = mem_get_qi (regs.r[1] ++); umb = mem_get_qi (regs.r[2] ++); regs.r[3] --; if (uma != umb || uma == 0) break; } if (uma == umb) set_zc (1, 1); else set_zc (0, ((int)uma - (int)umb) >= 0); cycles (2 + 4 * (tx / 4) + 4 * (tx % 4)); break; case RXO_setpsw: v = 1 << opcode->op[0].reg; if (FLAG_PM && (v == FLAGBIT_I || v == FLAGBIT_U)) break; regs.r_psw |= v; cycles (1); break; case RXO_smovb: RL (3); #ifdef CYCLE_ACCURATE tx = regs.r[3]; #endif while (regs.r[3]) { uma = mem_get_qi (regs.r[2] --); mem_put_qi (regs.r[1]--, uma); regs.r[3] --; } #ifdef CYCLE_ACCURATE if (tx > 3) cycles (6 + 3 * (tx / 4) + 3 * (tx % 4)); else cycles (2 + 3 * (tx % 4)); #endif break; case RXO_smovf: RL (3); #ifdef CYCLE_ACCURATE tx = regs.r[3]; #endif while (regs.r[3]) { uma = mem_get_qi (regs.r[2] ++); mem_put_qi (regs.r[1]++, uma); regs.r[3] --; } cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4)); break; case RXO_smovu: #ifdef CYCLE_ACCURATE tx = regs.r[3]; #endif while (regs.r[3] != 0) { uma = mem_get_qi (regs.r[2] ++); mem_put_qi (regs.r[1]++, uma); regs.r[3] --; if (uma == 0) break; } cycles (2 + 3 * (int)(tx / 4) + 3 * (tx % 4)); break; case RXO_shar: /* d = ma >> mb */ SHIFT_OP (sll, int, mb, >>=, 1); E (1); break; case RXO_shll: /* d = ma << mb */ SHIFT_OP (ll, int, mb, <<=, 0x80000000UL); E (1); break; case RXO_shlr: /* d = ma >> mb */ SHIFT_OP (ll, unsigned int, mb, >>=, 1); E (1); break; case RXO_sstr: RL (3); #ifdef CYCLE_ACCURATE tx = regs.r[3]; #endif switch (opcode->size) { case RX_Long: while (regs.r[3] != 0) { mem_put_si (regs.r[1], regs.r[2]); regs.r[1] += 4; regs.r[3] --; } cycles (2 + tx); break; case RX_Word: while (regs.r[3] != 0) { mem_put_hi (regs.r[1], regs.r[2]); regs.r[1] += 2; regs.r[3] --; } cycles (2 + (int)(tx / 2) + tx % 2); break; case RX_Byte: while (regs.r[3] != 0) { mem_put_qi (regs.r[1], regs.r[2]); regs.r[1] ++; regs.r[3] --; } cycles (2 + (int)(tx / 4) + tx % 4); break; default: abort (); } break; case RXO_stcc: if (GS2()) PD (GS ()); E1; break; case RXO_stop: PRIVILEDGED (); regs.r_psw |= FLAGBIT_I; DO_RETURN (RX_MAKE_STOPPED(0)); case RXO_sub: MATH_OP (-, 0); break; case RXO_suntil: RL(3); #ifdef CYCLE_ACCURATE tx = 0; #endif if (regs.r[3] == 0) { cycles (3); break; } switch (opcode->size) { case RX_Long: uma = get_reg (2); while (regs.r[3] != 0) { regs.r[3] --; umb = mem_get_si (get_reg (1)); regs.r[1] += 4; #ifdef CYCLE_ACCURATE tx ++; #endif if (umb == uma) break; } #ifdef CYCLE_ACCURATE cycles (3 + 3 * tx); #endif break; case RX_Word: uma = get_reg (2) & 0xffff; while (regs.r[3] != 0) { regs.r[3] --; umb = mem_get_hi (get_reg (1)); regs.r[1] += 2; #ifdef CYCLE_ACCURATE tx ++; #endif if (umb == uma) break; } #ifdef CYCLE_ACCURATE cycles (3 + 3 * (tx / 2) + 3 * (tx % 2)); #endif break; case RX_Byte: uma = get_reg (2) & 0xff; while (regs.r[3] != 0) { regs.r[3] --; umb = mem_get_qi (regs.r[1]); regs.r[1] += 1; #ifdef CYCLE_ACCURATE tx ++; #endif if (umb == uma) break; } #ifdef CYCLE_ACCURATE cycles (3 + 3 * (tx / 4) + 3 * (tx % 4)); #endif break; default: abort(); } if (uma == umb) set_zc (1, 1); else set_zc (0, ((int)uma - (int)umb) >= 0); break; case RXO_swhile: RL(3); #ifdef CYCLE_ACCURATE tx = 0; #endif if (regs.r[3] == 0) break; switch (opcode->size) { case RX_Long: uma = get_reg (2); while (regs.r[3] != 0) { regs.r[3] --; umb = mem_get_si (get_reg (1)); regs.r[1] += 4; #ifdef CYCLE_ACCURATE tx ++; #endif if (umb != uma) break; } #ifdef CYCLE_ACCURATE cycles (3 + 3 * tx); #endif break; case RX_Word: uma = get_reg (2) & 0xffff; while (regs.r[3] != 0) { regs.r[3] --; umb = mem_get_hi (get_reg (1)); regs.r[1] += 2; #ifdef CYCLE_ACCURATE tx ++; #endif if (umb != uma) break; } #ifdef CYCLE_ACCURATE cycles (3 + 3 * (tx / 2) + 3 * (tx % 2)); #endif break; case RX_Byte: uma = get_reg (2) & 0xff; while (regs.r[3] != 0) { regs.r[3] --; umb = mem_get_qi (regs.r[1]); regs.r[1] += 1; #ifdef CYCLE_ACCURATE tx ++; #endif if (umb != uma) break; } #ifdef CYCLE_ACCURATE cycles (3 + 3 * (tx / 4) + 3 * (tx % 4)); #endif break; default: abort(); } if (uma == umb) set_zc (1, 1); else set_zc (0, ((int)uma - (int)umb) >= 0); break; case RXO_wait: PRIVILEDGED (); regs.r_psw |= FLAGBIT_I; DO_RETURN (RX_MAKE_STOPPED(0)); case RXO_xchg: #ifdef CYCLE_ACCURATE regs.m2m = 0; #endif v = GS (); /* This is the memory operand, if any. */ PS (GD ()); /* and this may change the address register. */ PD (v); E2; #ifdef CYCLE_ACCURATE /* all M cycles happen during xchg's cycles. */ memory_dest = 0; memory_source = 0; #endif break; case RXO_xor: LOGIC_OP (^); break; default: EXCEPTION (EX_UNDEFINED); } #ifdef CYCLE_ACCURATE regs.m2m = 0; if (memory_source) regs.m2m |= M2M_SRC; if (memory_dest) regs.m2m |= M2M_DST; regs.rt = new_rt; new_rt = -1; #endif #ifdef CYCLE_STATS if (prev_cycle_count == regs.cycle_count) { printf("Cycle count not updated! id %s\n", id_names[opcode->id]); abort (); } #endif #ifdef CYCLE_STATS if (running_benchmark) { int omap = op_lookup (opcode->op[0].type, opcode->op[1].type, opcode->op[2].type); cycles_per_id[opcode->id][omap] += regs.cycle_count - prev_cycle_count; times_per_id[opcode->id][omap] ++; times_per_pair[prev_opcode_id][po0][opcode->id][omap] ++; prev_opcode_id = opcode->id; po0 = omap; } #endif return RX_MAKE_STEPPED (); } #ifdef CYCLE_STATS void reset_pipeline_stats (void) { memset (cycles_per_id, 0, sizeof(cycles_per_id)); memset (times_per_id, 0, sizeof(times_per_id)); memory_stalls = 0; register_stalls = 0; branch_stalls = 0; branch_alignment_stalls = 0; fast_returns = 0; memset (times_per_pair, 0, sizeof(times_per_pair)); running_benchmark = 1; benchmark_start_cycle = regs.cycle_count; } void halt_pipeline_stats (void) { running_benchmark = 0; benchmark_end_cycle = regs.cycle_count; } #endif void pipeline_stats (void) { #ifdef CYCLE_STATS int i, o1; int p, p1; #endif #ifdef CYCLE_ACCURATE if (verbose == 1) { printf ("cycles: %llu\n", regs.cycle_count); return; } printf ("cycles: %13s\n", comma (regs.cycle_count)); #endif #ifdef CYCLE_STATS if (benchmark_start_cycle) printf ("bmark: %13s\n", comma (benchmark_end_cycle - benchmark_start_cycle)); printf("\n"); for (i = 0; i < N_RXO; i++) for (o1 = 0; o1 < N_MAP; o1 ++) if (times_per_id[i][o1]) printf("%13s %13s %7.2f %s %s\n", comma (cycles_per_id[i][o1]), comma (times_per_id[i][o1]), (double)cycles_per_id[i][o1] / times_per_id[i][o1], op_cache_string(o1), id_names[i]+4); printf("\n"); for (p = 0; p < N_RXO; p ++) for (p1 = 0; p1 < N_MAP; p1 ++) for (i = 0; i < N_RXO; i ++) for (o1 = 0; o1 < N_MAP; o1 ++) if (times_per_pair[p][p1][i][o1]) { printf("%13s %s %-9s -> %s %s\n", comma (times_per_pair[p][p1][i][o1]), op_cache_string(p1), id_names[p]+4, op_cache_string(o1), id_names[i]+4); } printf("\n"); printf("%13s memory stalls\n", comma (memory_stalls)); printf("%13s register stalls\n", comma (register_stalls)); printf("%13s branches taken (non-return)\n", comma (branch_stalls)); printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls)); printf("%13s fast returns\n", comma (fast_returns)); #endif }